diff --git a/README.md b/README.md index 2e73dc27600a27e843171099d2506af0114f9958..8211bbea11a6600af683a9429358b21906c70d53 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ HPVM is currently at version 0.5. For more about what HPVM is, see [our website] The following components are required to be installed on your machine to build HPVM. * GCC (>=5.1.0) -* CMake (>=3.4.3) +* CMake (>=3.17.0) * Python (>=2.7) * GNU Make (>=3.79.1) * OpenCL (>=1.0.0) or CUDA (>=9.1, only required for GPU support) @@ -49,6 +49,17 @@ git clone https://gitlab.engr.illinois.edu/llvm/hpvm-release.git/ cd hpvm-release/hpvm ``` +Before installing HPVM, some paths must be set for installation to succeed. The following variables in set_paths.sh must be set: + +* CUDA_TOOLKIT_PATH --- Path to the CUDA toolkit +* CUDA_INCLUDE_PATH --- Path to the CUDA headers +* CUDA_LIB_PATH -- Path to CUDA libraries + +Once the aforementioned variables in set_paths.sh have been specified, run the script. +```shell +source set_paths.sh +``` + HPVM installer script can be used to download, configure and build HPVM along with LLVM and Clang. ```shell bash install.sh diff --git a/hpvm/include/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h b/hpvm/include/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h index ce6725b930cefb56d23cad2799bee27c97e44783..a76b63caa4897de2aa6fe358774e32835b809eae 100644 --- a/hpvm/include/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h +++ b/hpvm/include/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h @@ -138,7 +138,7 @@ public: FindFusionTargetsTraversal(Module &_M, builddfg::BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) { -/* FPs[hpvm::PROMISE_TARGET] = { {Intrinsic::visc_tensor_conv, +/* FPs[hpvm::TENSOR_TARGET] = { {Intrinsic::visc_tensor_conv, Intrinsic::hpvm_tensor_add, Intrinsic::hpvm_tensor_relu, Intrinsic::hpvm_tensor_pooling diff --git a/hpvm/include/SupportHPVM/DFG2LLVM.h b/hpvm/include/SupportHPVM/DFG2LLVM.h index 533cad17aae26b7006d16efada7378d83a9bc840..fb1e35033eda0445f10423beb69aab5f07c093f0 100644 --- a/hpvm/include/SupportHPVM/DFG2LLVM.h +++ b/hpvm/include/SupportHPVM/DFG2LLVM.h @@ -174,7 +174,7 @@ bool CodeGenTraversal::checkPreferredTarget(DFNode *N, hpvm::Target T) { case hpvm::CPU_TARGET: HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu"); break; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise"); break; default: @@ -211,7 +211,7 @@ bool CodeGenTraversal::preferredTargetIncludes(DFNode *N, hpvm::Target T) { case hpvm::CUDNN_TARGET: HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_cudnn")); break; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: HintNode.push_back(M->getOrInsertNamedMetadata("hpvm_hint_promise")); break; case hpvm::CPU_OR_GPU_TARGET: diff --git a/hpvm/include/SupportHPVM/DFGraph.h b/hpvm/include/SupportHPVM/DFGraph.h index 5674aa4fc67665d9db208e317b0d936803de3c82..3da7c0b01a79d52f668795eb072fdcb6381813a9 100644 --- a/hpvm/include/SupportHPVM/DFGraph.h +++ b/hpvm/include/SupportHPVM/DFGraph.h @@ -357,7 +357,7 @@ public: case hpvm::GPU_TARGET: GenFuncInfo.gpu_hasCPUFunc = isCPUFunc; break; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: GenFuncInfo.promise_hasCPUFunc = isCPUFunc; break; case hpvm::CUDNN_TARGET: @@ -382,7 +382,7 @@ public: return GenFuncInfo.gpu_hasCPUFunc; case hpvm::CUDNN_TARGET: return GenFuncInfo.cudnn_hasCPUFunc; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: return GenFuncInfo.promise_hasCPUFunc; case hpvm::CPU_OR_GPU_TARGET: assert(false && "Single target expected (CPU/GPU/SPIR/CUDNN/PROMISE)\n"); @@ -419,7 +419,7 @@ public: GenFuncs.CUDNNGenFunc = F; GenFuncInfo.cudnn_hasCPUFunc = isCPUFunc; break; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: if (GenFuncs.PROMISEGenFunc != NULL) { DEBUG(errs() << "Warning: Second generated PROMISE function for node " << FuncPointer->getName() << "\n"); @@ -447,7 +447,7 @@ public: return GenFuncs.GPUGenFunc; case hpvm::CUDNN_TARGET: return GenFuncs.CUDNNGenFunc; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: return GenFuncs.PROMISEGenFunc; case hpvm::CPU_OR_GPU_TARGET: assert(false && @@ -475,7 +475,7 @@ public: GenFuncs.CUDNNGenFunc = NULL; GenFuncInfo.cudnn_hasCPUFunc = false; break; - case hpvm::PROMISE_TARGET: + case hpvm::TENSOR_TARGET: GenFuncs.PROMISEGenFunc = NULL; GenFuncInfo.promise_hasCPUFunc = false; break; diff --git a/hpvm/include/SupportHPVM/HPVMHint.h b/hpvm/include/SupportHPVM/HPVMHint.h index 7677b01ae2b05e74c6d0609df9ea5bbaf6e14ea9..25020e82016b8b3320abb8ddf94b78f24bc91acd 100644 --- a/hpvm/include/SupportHPVM/HPVMHint.h +++ b/hpvm/include/SupportHPVM/HPVMHint.h @@ -20,7 +20,7 @@ enum Target { CPU_TARGET, GPU_TARGET, CUDNN_TARGET, - PROMISE_TARGET, + TENSOR_TARGET, CPU_OR_GPU_TARGET, // ALL_TARGETS, NUM_TARGETS diff --git a/hpvm/include/SupportHPVM/HPVMUtils.h b/hpvm/include/SupportHPVM/HPVMUtils.h index ff47bc0fe494a232b4b8438a0babd8bd6a507aef..9a91494a41d6109cda8a8b9b885919fd197fb768 100644 --- a/hpvm/include/SupportHPVM/HPVMUtils.h +++ b/hpvm/include/SupportHPVM/HPVMUtils.h @@ -395,27 +395,27 @@ bool tagIncludesTarget(hpvm::Target Tag, hpvm::Target T) { return true; return false; case hpvm::CUDNN_TARGET: - if (T == hpvm::CUDNN_TARGET) - return true; - return false; - case hpvm::PROMISE_TARGET: - if (T == hpvm::PROMISE_TARGET) - return true; - return false; + if (T == hpvm::CUDNN_TARGET) + return true; + return false; + case hpvm::TENSOR_TARGET: + if (T == hpvm::TENSOR_TARGET) + return true; + return false; default: assert(false && "Unknown Target\n"); } } bool isSingleTargetTag(hpvm::Target T) { - return ((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET) || - (T == hpvm::CUDNN_TARGET) || (T == hpvm::PROMISE_TARGET)); + return ((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET) + || (T == hpvm::CUDNN_TARGET) || (T == hpvm::TENSOR_TARGET)); } // Add the specified target to the given tag hpvm::Target getUpdatedTag(hpvm::Target Tag, hpvm::Target T) { - assert(((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET) || - (T == hpvm::CUDNN_TARGET) || (T == hpvm::PROMISE_TARGET)) && + assert(((T == hpvm::CPU_TARGET) || (T == hpvm::GPU_TARGET) + || (T == hpvm::CUDNN_TARGET) || (T == hpvm::TENSOR_TARGET)) && "The target is only allowed to be a single target: CPU, GPU, SPIR, " "CUDNN, PROMISE\n"); @@ -423,25 +423,22 @@ hpvm::Target getUpdatedTag(hpvm::Target Tag, hpvm::Target T) { case hpvm::None: return T; case hpvm::CPU_TARGET: - assert((T != hpvm::CUDNN_TARGET) && (T != hpvm::PROMISE_TARGET) && - "Unsupported target combination\n"); + assert((T != hpvm::CUDNN_TARGET) && (T != hpvm::TENSOR_TARGET) && "Unsupported target combination\n"); if (T == hpvm::CPU_TARGET) return hpvm::CPU_TARGET; if (T == hpvm::GPU_TARGET) return hpvm::CPU_OR_GPU_TARGET; return T; case hpvm::GPU_TARGET: - assert((T != hpvm::CUDNN_TARGET) && (T != hpvm::PROMISE_TARGET) && - "Unsupported target combination\n"); + assert((T != hpvm::CUDNN_TARGET) && (T != hpvm::TENSOR_TARGET) && "Unsupported target combination\n"); if (T == hpvm::CPU_TARGET) return hpvm::CPU_OR_GPU_TARGET; if (T == hpvm::GPU_TARGET) return hpvm::GPU_TARGET; return T; case hpvm::CPU_OR_GPU_TARGET: - assert((T != hpvm::CUDNN_TARGET) && (T != hpvm::PROMISE_TARGET) && - "Unsupported target combination\n"); - return hpvm::CPU_OR_GPU_TARGET; + assert((T != hpvm::CUDNN_TARGET) && (T != hpvm::TENSOR_TARGET) && "Unsupported target combination\n"); + return hpvm::CPU_OR_GPU_TARGET; default: assert(false && "Unknown Target\n"); } @@ -471,14 +468,14 @@ void addHint(Function *F, hpvm::Target T) { HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu_gpu"); break; case hpvm::CUDNN_TARGET: - DEBUG(errs() << "CUDNN Target\n"); - HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cudnn"); - break; - case hpvm::PROMISE_TARGET: - DEBUG(errs() << "PROMISE Target\n"); - errs() << "PROMISE\n"; - HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise"); - break; + DEBUG(errs() << "CUDNN Target\n"); + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cudnn"); + break; + case hpvm::TENSOR_TARGET: + DEBUG(errs() << "PROMISE Target\n"); + errs() << "PROMISE\n"; + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise"); + break; default: llvm_unreachable("Unsupported Target Hint!"); break; @@ -510,11 +507,11 @@ void removeHint(Function *F, hpvm::Target T) { HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cpu"); break; case hpvm::CUDNN_TARGET: - HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cudnn"); - break; - case hpvm::PROMISE_TARGET: - HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise"); - break; + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cudnn"); + break; + case hpvm::TENSOR_TARGET: + HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise"); + break; default: llvm_unreachable("Unsupported Target Hint!"); break; @@ -565,7 +562,7 @@ hpvm::Target getPreferredTarget(Function *F) { if (FoundPrefTarget("hpvm_hint_cudnn")) return hpvm::CUDNN_TARGET; if (FoundPrefTarget("hpvm_hint_promise")) - return hpvm::PROMISE_TARGET; + return hpvm::TENSOR_TARGET; return hpvm::None; } diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index bb044cd756883449dc775fd7742ac575aa5815a1..b18cd4551ba33e0c315a416164b45e6282098aeb 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -5,9 +5,6 @@ add_subdirectory(DFG2LLVM_CPU) add_subdirectory(GenHPVM) add_subdirectory(LocalMem) add_subdirectory(DFG2LLVM_WrapperAPI) -add_subdirectory(ReplaceIntrinsics) add_subdirectory(DFG2LLVM_CUDNN) -add_subdirectory(ExtractHPVMLeafNodes) add_subdirectory(FuseHPVMTensorNodes) -add_subdirectory(InlineTensorCalls) add_subdirectory(InPlaceDFG) diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp index a1cdfc113d3384f54836b0da715a9f42d1058486..104b667fa76abac9eeb33cf82e6d4fdcd7734cb8 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp @@ -1465,15 +1465,35 @@ void CGT_CPU::codeGen(DFLeafNode *N) { Ftmp = addIdxDimArgs(Ftmp); } - N->setTag(hpvm::None); - N->removeGenFuncForTarget(hpvm::PROMISE_TARGET); - N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true); - N->setTag(hpvm::CPU_TARGET); - break; - } - default: { - break; - } + N->removeGenFuncForTarget(hpvm::CUDNN_TARGET); + N->setTag(hpvm::None); + N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true); + N->setTag(hpvm::CPU_TARGET); + break; + } + case hpvm::TENSOR_TARGET: + { + errs() << "Promise hint found. Store PROMISE function as CPU funtion.\n"; + // Make sure there is a generated x86 function for promise + assert(N->getGenFuncForTarget(hpvm::TENSOR_TARGET) && ""); + assert(N->hasCPUGenFuncForTarget(hpvm::TENSOR_TARGET) && ""); + // Store the PROMISE x86 function as the CPU generated function + Function *Ftmp = N->getGenFuncForTarget(N->getTag()); + // after adding the required number of arguments + if (!N->getParent()->isChildGraphStreaming()) { + Ftmp = addIdxDimArgs(Ftmp); + } + + N->setTag(hpvm::None); + N->removeGenFuncForTarget(hpvm::TENSOR_TARGET); + N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true); + N->setTag(hpvm::CPU_TARGET); + break; + } + default: + { + break; + } } return; diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp index 4adde2f2b7f7c0be11d65dd1c2f5086b397f7f65..b400c12021d2df712ea0bbd04f03dbe8724abc75 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp @@ -27,6 +27,8 @@ #include "SupportHPVM/DFG2LLVM.h" #include "InPlaceDFG/InPlaceDFGAnalysis.h" +#include "Config.h" + #include <sstream> #include <fstream> @@ -1325,7 +1327,7 @@ void CGT_WrapperAPI::initRuntimeAPI() { GlobalValue::ExternalLinkage, ConstArray2, ""); Constant *ConfsGEPConst = ConstantExpr::getGetElementPtr( GV2->getType()->getPointerElementType(), GV2, GEPIndices); - ArrayRef<Value *> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst}; + Value *RTCInitArgs[] = {ConfsGEPConst, QRangesGEPConst}; CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", InitCall); @@ -1367,7 +1369,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode *N) { // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. - Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::PROMISE_TARGET); + Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::TENSOR_TARGET); assert((F_wrapper_api == NULL) && "Error: Visiting a node for which code already generated"); @@ -1381,7 +1383,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode *N) { F_wrapper_api->removeFromParent(); M.getFunctionList().push_back(F_wrapper_api); - N->addGenFunc(F_wrapper_api, hpvm::PROMISE_TARGET, true); + N->addGenFunc(F_wrapper_api, hpvm::TENSOR_TARGET, true); /* Removing HPVM in/out/inout function attributes */ for (Function::arg_iterator ai = F_wrapper_api->arg_begin(), diff --git a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/CMakeLists.txt b/hpvm/lib/Transforms/ExtractHPVMLeafNodes/CMakeLists.txt deleted file mode 100644 index bb943f9100e628c87c865dfbdce80fc094ebb23e..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_library( ExtractHPVMLeafNodes - MODULE - ExtractHPVMLeafNodes.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) - diff --git a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.cpp b/hpvm/lib/Transforms/ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.cpp deleted file mode 100644 index 031503adeddd6c070ca06f3012fa0c2e5362f92c..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.cpp +++ /dev/null @@ -1,248 +0,0 @@ -//===------------------- ExtractHPVMLeafNodeGenFunctions.cpp -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ExtractHPVMLeafNodes" - -#include "llvm/Support/SourceMgr.h" -#include "llvm/Pass.h" - -#include "SupportHPVM/DFGTreeTraversal.h" -#include "ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.h" - -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Support/ToolOutputFile.h" -#include "llvm/Support/FileSystem.h" - -using namespace llvm; -using namespace builddfg; -using namespace extracthpvmleaf; -using namespace dfg2llvm; - -namespace { - -class PrintLeafNodes : public DFGTreeTraversal { - public: - virtual void process(DFInternalNode* N) override; - virtual void process(DFLeafNode* N) override; - - // Constructor - PrintLeafNodes(Module &_M, BuildDFG &_DFG) : DFGTreeTraversal(_M, _DFG) {} - -}; - -} - -void PrintLeafNodes::process(DFInternalNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); - return; // nothing to do -} - -void PrintLeafNodes::process(DFLeafNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); - if((N->isDummyNode())) { - DEBUG(errs() << "Skipping Dummy Node: " << N->getFuncPointer()->getName() << "\n"); - return; - } - - // Find function generated for node - Function *F = N->getGenFuncForTarget(hpvm::CPU_TARGET); - assert(F != NULL - && "This pass is invoked after code generation for x86 is completed.\nFound leaf node for which code generation has not happened!\n"); - assert(N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && - "The generated function from x86 pass is not an x86 function\n"); - - std::string module_name = std::string("./build/") + std::string(F->getName().str().c_str()) + std::string("_module.ll"); - Twine tw(module_name); - // Create a new module for the node function - //Twine tw = Twine(F->getName()).concat(Twine("_module.ll")); - Module *m = new Module(tw.str(), F->getParent()->getContext()); - // Create a new function for F. It will be written to a new module. - ValueToValueMapTy VMap; - Function *ClonedF = CloneFunction(F, VMap); - // Remove it from current module - ClonedF->removeFromParent(); - // Insert it to the newly created module for it - m->getFunctionList().push_back(ClonedF); - - std::vector<Instruction*> ItoRemove; - - for (inst_iterator i = inst_begin(ClonedF), e = inst_end(ClonedF); i != e; ++i) { - Instruction *I = &(*i); - errs() << *I << "\n"; - - if (CallInst *CI = dyn_cast<CallInst>(I)) { - errs() << "Found call instruction\n"; - - Function *CalledF = CI->getCalledFunction(); - StringRef CallName = CalledF->getName(); - errs() << "CallName: " << CallName << "\n"; - -// if (CallName.startswith("llvm_hpvm")) { //TODO - if ((CallName.startswith("llvm_hpvm")) || (CallName.startswith("tensor"))) { //TODO -// errs() << "This is an HPVM runtime call. Include its declaration.\n"; - errs() << "This is an HPVM runtime call or tensor. Include its declaration.\n"; - - FunctionType *CalledFType = CalledF->getFunctionType(); - - std::vector<Value*> Fargs; - for (unsigned argno = 0; argno < CI->getNumArgOperands(); argno++) { - Fargs.push_back(CI->getArgOperand(argno)); - } - Function *FDecl = dyn_cast<Function>((m->getOrInsertFunction(CallName, CalledFType)).getCallee()); - CallInst *NewCI = CallInst::Create(CalledFType, FDecl, Fargs, CallName, CI); - errs() << "NewCI: " << *NewCI << "\n"; - CI->replaceAllUsesWith(NewCI); - ItoRemove.push_back(CI); - } - } - } - - for (unsigned i = 0; i < ItoRemove.size() ; i++) { - ItoRemove[i]->eraseFromParent(); - } - - ItoRemove.clear(); - - // Print new module - legacy::PassManager Passes; - - errs() << "Writing to File --- " << tw.str() << "\n"; - std::error_code EC; - ToolOutputFile Out(tw.str(), EC, sys::fs::F_None); - if (EC) { - errs() << EC.message() << '\n'; - } - - Passes.add(createPrintModulePass(Out.os())); - Passes.run(*m); - // Declare success. - Out.keep(); - - // Any call that is to F, needs to call the new external function - // Edit initial module to do so - // This is the name with which the function is called now - StringRef FName = ClonedF->getName(); - FunctionType *FType = F->getFunctionType(); - - // This is a node function, so it is only called through the dataflow graph - assert(F->hasOneUse() && "F is an HPVM node function\n"); - -/* - errs() << "F uses: " << F->getNumUses() << "\n" ; - for(Value::user_iterator ui = F->user_begin(), - ue = F->user_end(); ui!=ue; ++ui) { - errs() << "use : "<< **ui << "\n"; - } -*/ - - // Get the parent node's generated x86 function - DFInternalNode *ParentNode = N->getParent(); - Function *PGenF = ParentNode->getGenFuncForTarget(hpvm::CPU_TARGET); - assert(PGenF != NULL - && "This pass is invoked after code generation for x86 is completed.\nFound node for which code generation has not happened!\n"); - assert(ParentNode->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && - "The generated function from x86 pass is not an x86 function\n"); - - for (inst_iterator i = inst_begin(PGenF), e = inst_end(PGenF); i != e; ++i) { - Instruction *I = &(*i); - errs() << *I << "\n"; - - if (CallInst *CI = dyn_cast<CallInst>(I)) { - errs() << "Found call instruction\n"; - - StringRef CallName = CI->getCalledFunction()->getName(); - errs() << "CallName: " << CallName << "\n"; - errs() << "F->getName(): " << F->getName() << "\n"; - - if (CallName == F->getName()) { - // Found the call to the leaf node function we moved to the other module. - // Replace the call - std::vector<Value*> Fargs; - for (unsigned argno = 0; argno < CI->getNumArgOperands(); argno++) { - Fargs.push_back(CI->getArgOperand(argno)); - } - Function *FDecl = dyn_cast<Function>(M.getOrInsertFunction(FName, FType).getCallee()); - CallInst *NewCI = CallInst::Create(FType, FDecl, Fargs, FName, CI); - errs() << "NewCI: " << *NewCI << "\n"; - CI->replaceAllUsesWith(NewCI); - ItoRemove.push_back(CI); - } - } - } - - for (unsigned i = 0; i < ItoRemove.size() ; i++) { - ItoRemove[i]->eraseFromParent(); - } - - // Clean up - ClonedF->eraseFromParent(); - delete m; - - F->replaceAllUsesWith(UndefValue::get(F->getType())); - F->eraseFromParent(); - - return; -} - -void ExtractHPVMLeafNodeFunctions::run(Module &M, BuildDFG &DFG) { - - errs() << "\nEXTRACT HPVM LEAF NODE FUNCTIONS PASS\n"; - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Visitor for Graph Traversal - PrintLeafNodes *LeafVisitor = new PrintLeafNodes(M, DFG); - - // Iterate over all the DFGs - // Analyse the edges for parameters that are valid to be used in place - for (auto rootNode: Roots) { - LeafVisitor->visit(rootNode); - } - - delete LeafVisitor; - return; -} - -namespace { -struct ExtractHPVMLeafNodeGenFunctionsWrapper : public ModulePass { - static char ID; - ExtractHPVMLeafNodeGenFunctionsWrapper() : ModulePass(ID) {} - - bool runOnModule(Module &) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; -} // end anonymous namespace - -void ExtractHPVMLeafNodeGenFunctionsWrapper::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); -} - -bool ExtractHPVMLeafNodeGenFunctionsWrapper::runOnModule(Module &M) { - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - ExtractHPVMLeafNodeFunctions ELNF; - ELNF.run(M, DFG); - - return false; -} - -char ExtractHPVMLeafNodeGenFunctionsWrapper::ID = 0; -static RegisterPass<ExtractHPVMLeafNodeGenFunctionsWrapper> X( - "hpvm-extract-leaf-gen", - "Pass to extract leaf nodes to modules in HPVM", - false /* does not modify the CFG */, -true /* transformation, not just analysis */); - diff --git a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.exports b/hpvm/lib/Transforms/ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.exports deleted file mode 100644 index 139597f9cb07c5d48bed18984ec4747f4b4f3438..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/ExtractHPVMLeafNodes.exports +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/LLVMBuild.txt b/hpvm/lib/Transforms/ExtractHPVMLeafNodes/LLVMBuild.txt deleted file mode 100644 index 73ac540f06e86e9e7f0201b993d2c1e11270158e..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ExtractHPVMLeafNodes/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt -------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ExtractHPVMLeafNodes -parent = Transforms diff --git a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp index d27c6d9dce977c68d24f30e8b6db159153b57e7b..131a291a5b5a5f153985239effb97f5cf7f8e049 100644 --- a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp +++ b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp @@ -812,7 +812,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { } errs() << "THIS IS NOT A DUMMY NODE\n"; errs() << "INTRINSIC: " << *isValidHPVMTensorNode(N) << "\n"; - if (!preferredTargetIncludes(N, hpvm::PROMISE_TARGET)) { + if(!preferredTargetIncludes(N, hpvm::TENSOR_TARGET)) { // Only fuse if we plan to target PROMISE/Layers API // The CUDNN backend would be able to generate calls for the fused node, // but not the other way around diff --git a/hpvm/lib/Transforms/InlineTensorCalls/CMakeLists.txt b/hpvm/lib/Transforms/InlineTensorCalls/CMakeLists.txt deleted file mode 100644 index 29dd2c8431362b28a1d5683eadb2c9eb867696ff..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/InlineTensorCalls/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_library( InlineTensorCalls - MODULE - InlineTensorCalls.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) - diff --git a/hpvm/lib/Transforms/InlineTensorCalls/InlineTensorCalls.cpp b/hpvm/lib/Transforms/InlineTensorCalls/InlineTensorCalls.cpp deleted file mode 100644 index d31434341cf65939768d0acb7a0051d453909971..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/InlineTensorCalls/InlineTensorCalls.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//=== InlineApproxHPVMCalls.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "INLINE_APPROXHPVM_CALLS" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" - -#include "llvm/IR/InstIterator.h" - -#include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/InlineCost.h" - -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/IR/CallSite.h" -#include "llvm/ADT/SetVector.h" -#include <sstream> - -using namespace llvm; - - -namespace { - - struct InlineApproxHPVMCalls : public ModulePass { - static char ID; // Pass identification, replacement for typeid - InlineApproxHPVMCalls() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { - - InlineFunctionInfo IFI; - SmallSetVector<CallSite, 16> Calls; - bool Changed = false; - SmallVector<Function *, 16> InlinedFunctions; - for (Function &F : M){ - if (!F.isDeclaration() && F.getName().startswith("tensor") ) { - //errs()<<"Function = "<<*&F<<"\n"; - Calls.clear(); - - for (User *U : F.users()) - if (auto CS = CallSite(U)) - if (CS.getCalledFunction() == &F) - Calls.insert(CS); - - for (CallSite CS : Calls) - // FIXME: We really shouldn't be able to fail to inline at this point! - // We should do something to log or check the inline failures here. - Changed |= InlineFunction(CS, IFI); - - } - } - - return true; - } - - }; - - -} // End of namespace - -char InlineApproxHPVMCalls::ID = 0; -static RegisterPass<InlineApproxHPVMCalls> X("inline-tensor-calls", - "Inline ApproxHPVM tensor library function calls (CPU version)", - true /* modifies the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/hpvm/lib/Transforms/InlineTensorCalls/InlineTensorCalls.exports b/hpvm/lib/Transforms/InlineTensorCalls/InlineTensorCalls.exports deleted file mode 100644 index 139597f9cb07c5d48bed18984ec4747f4b4f3438..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/InlineTensorCalls/InlineTensorCalls.exports +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/hpvm/lib/Transforms/InlineTensorCalls/LLVMBuild.txt b/hpvm/lib/Transforms/InlineTensorCalls/LLVMBuild.txt deleted file mode 100644 index c160516a6477d367893495e39f5fd4d00366f6f0..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/InlineTensorCalls/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt -------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = InlineTensorCalls -parent = Transforms diff --git a/hpvm/lib/Transforms/ReplaceIntrinsics/CMakeLists.txt b/hpvm/lib/Transforms/ReplaceIntrinsics/CMakeLists.txt deleted file mode 100644 index 460aabcc27b51a2d94dabee3e9c4c60d14803ea9..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ReplaceIntrinsics/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_library( ReplaceIntrinsics - MODULE - ReplaceIntrinsics.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) - diff --git a/hpvm/lib/Transforms/ReplaceIntrinsics/LLVMBuild.txt b/hpvm/lib/Transforms/ReplaceIntrinsics/LLVMBuild.txt deleted file mode 100644 index 95739b3d4d1c3a68cc5014dc85fb26d3b1fc6ac5..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ReplaceIntrinsics/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt -------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ReplaceIntrinsics -parent = Transforms diff --git a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp deleted file mode 100644 index 45ad0ece23568a41fbf532b92918a582ebbae505..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp +++ /dev/null @@ -1,495 +0,0 @@ -//=== ReplaceApproxHPVMIntrinsicsWithFCalls.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "REPLACE_APPROXHPVM_INTRINSICS_WITH_FCALLS" - -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm-c/Core.h" - -#include "SupportHPVM/DFG2LLVM.h" -#include "InPlaceDFG/InPlaceDFGAnalysis.h" - -#include <sstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; - -// TODO: We still need in place analysis, if calls have the same interface -using namespace inplacedfg; - -namespace { -// Helper class declarations - -// Replace ApproxHPVM intrinsics with LLVM function calls. -// aiming to go through the CPU backend code generation. - -struct DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls() : DFG2LLVM(ID) {} - -private: -public: - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addRequired<InPlaceDFGAnalysisWrapper>(); - AU.addPreserved<BuildDFG>(); - AU.addPreserved<InPlaceDFGAnalysisWrapper>(); - } - - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_ReplaceApproxHPVMIntrinsicsWithFCalls : public CodeGenTraversal { - -private: - // Member variables - InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; - - // VISC Runtime API and Tensor runtime API - - /* TODO: I believe that TensorRt is not needed, since we will have llvm - implementations linked in, so init and cleanup calls can be removed and - relevant code also, but I leave in in for now until verified. */ - FunctionCallee llvm_hpvm_initTensorRt; - FunctionCallee llvm_hpvm_cleanupTensorRt; - // Constant* hpvm_request_tensor; DONE: request tensor will not be used - - // Functions - bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N); - - // Virtual Functions - void init(); - void initRuntimeAPI(); - void codeGen(DFInternalNode *N); - void codeGen(DFLeafNode *N); - -public: - // Constructor - CGT_ReplaceApproxHPVMIntrinsicsWithFCalls( - Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) - : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { - initRuntimeAPI(); - } -}; - -bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls:: - isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N) { - // We only expect the if branch to be taken - if (Argument *Arg = dyn_cast<Argument>(Op)) { - DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); - assert((Arg->getParent() == Fgen) && - "Extra Parameter in body of Function\n"); - // Candidae parameter is a function argument - // In this case, consult the result of in place analysis - // Find position in arg list - unsigned pos = Arg->getArgNo(); - // If this parameter cannot be used for in place operation - // code gen cannot continue - if (IPP->at(N)[pos]) { - DEBUG(errs() << *Arg << "\t: argument, suitable for in place\n"); - return true; - } else { - DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n"); - return false; - } - } else { - // If it is not an argument, then it needs to be the result of - // another intrinsic. These are new objects that are allocated, - // and consumed by next intrinsic. Alternatively, the intrinsic - // could have been replaced by a call to an LLVM function. - // We do not expect a merge pass to have run before the replacement pass, - // therefore we do not expect to go in the else branch. - DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n"); - if (dyn_cast<IntrinsicInst>(Op)) { - DEBUG(errs() << *Arg << "\t: local, suitable for in place\n"); - return true; - } else if (CallInst *CI = dyn_cast<CallInst>(Op)) { - if ((CI->getCalledFunction()->getName()).startswith("tensor")) - return true; - else - return false; - } else { - DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n"); - return false; - } - } -} - -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::init() {} - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext()); - if (runtimeModule == nullptr) - DEBUG(errs() << Err.getMessage()); - else - DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); - - // Get or insert Global declarations for - // - initialization - // - cleanup - // - request a tensor - DECLARE(llvm_hpvm_initTensorRt); - DECLARE(llvm_hpvm_cleanupTensorRt); - // DECLARE(hpvm_request_tensor); - - // Find hpvm.init and visc.cleanup calls, and add placeholder methods - // for initialization and cleanup of the hpvm tensor runtime - - Function *VI = M.getFunction("llvm.hpvm.init"); - assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n"); - InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create( - llvm_hpvm_initTensorRt, - ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); - - Function *VC = M.getFunction("llvm.hpvm.cleanup"); - assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n"); - CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value *>(), "", - CleanupCall); -} - -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFInternalNode *N) { - errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs() << "Skipping internal node\n"; -} - -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode *N) { - - // Skip if it is a dummy node - if (N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort if it is an allocation node - if (N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - // Search for intrinsic only if it has the right hint - if (!checkPreferredTarget(N, hpvm::CPU_TARGET)) { - errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n"; - return; - } - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); - errs() << "function name = " << F->getName() << "\n"; - - std::vector<IntrinsicInst *> IItoRemove; - - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - if (BuildDFG::isHPVMIntrinsic(I)) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); - assert( - (II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") && - "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - /********************* Handle VISC Tensor intrinsics ********************/ - // We replace them with calls to functions with implementations at the - // LLVM level - switch (II->getIntrinsicID()) { - - case Intrinsic::hpvm_tensor_convolution: { /* llvm.hpvm.tensor.convolution - */ - DEBUG(errs() << F->getName() << "\t: Handling tensor convolution \n"); - - // Argument list for the runtime call - std::vector<Value *> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); - Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - - Constant *conv_mode = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - Constant *conv_precision = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - - Args.push_back(conv_mode); - Args.push_back(conv_precision); - - // Create function call - FunctionCallee tensorConvolutionCPU; - DECLARE(tensorConvolutionCPU); - - CallInst *CI = CallInst::Create(tensorConvolutionCPU, Args, "", II); - // We can replace the call to hpvm.tensor.mul with the LLVM call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } break; - - case Intrinsic::hpvm_tensor_mul: { /* llvm.hpvm.tensor.mul */ - DEBUG(errs() << F->getName() << "\t: Handling tensor mul\n"); - - // Argument list for the runtime call - std::vector<Value *> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - - // Create function call - FunctionCallee tensorGemmCPU; - DECLARE(tensorGemmCPU); - - CallInst *CI = CallInst::Create(tensorGemmCPU, Args, "", II); - // We can replace the call to hpvm.tensor.mul with the LLVM call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } break; - - case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */ - DEBUG(errs() << F->getName() << "\t: Handling tensor add\n"); - // Tensor add(a,b) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - - // FIXME: remove this comment - must check for in-place - // assert(inplace && - // "Operand not valid for in place operation. Code gen - // aborted.\n"); - - // Argument list for the runtime call - std::vector<Value *> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - - // Create function call - FunctionCallee tensorAddCPU; - DECLARE(tensorAddCPU); - CallInst::Create(tensorAddCPU, Args, "", II); - // We can replace the call to hpvm.tensor.add with the 1st argument - // that, due to in place operation, now contains the result - II->replaceAllUsesWith(II->getOperand(0)); - - // Mark to remove at the end - IItoRemove.push_back(II); - } break; - - case Intrinsic::hpvm_tensor_pool_max: - case Intrinsic::hpvm_tensor_pool_mean: { /* llvm.hpvm.tensor.relu */ - DEBUG(errs() << F->getName() << "\t: Handling tensor_pool_max\n"); - // Tensor relu(a) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list - tensorPooling(input, poolFunction, window_height, - // window_width, vertical_pad, horizontal_pad, - // vertical_stride, horizontal_stride); - std::vector<Value *> Args; - Args.push_back(II->getOperand(0)); - - int pool_type = 0; - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) { - pool_type = 0; - } - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) { - pool_type = 1; - } - - Constant *constPoolType = - ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type); - Args.push_back(constPoolType); // ID for max pool. Min/Avg have - // different IDs (non-zero) - Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); - Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - Args.push_back(II->getOperand(6)); - - // Create function call - FunctionCallee tensorPoolingCPU; - DECLARE(tensorPoolingCPU); - CallInst *CI = CallInst::Create(tensorPoolingCPU, Args, "", II); - - // Replacing intrinsic result uses with the result of the LLVM call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } break; - - case Intrinsic::hpvm_tensor_relu: - case Intrinsic::hpvm_tensor_clipped_relu: - case Intrinsic::hpvm_tensor_tanh: { /* llvm.hpvm.tensor.relu */ - DEBUG(errs() << F->getName() - << "\t: Handling tensor activation functions \n"); - // Tensor relu(a) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - std::vector<Value *> Args; - Args.push_back(II->getOperand(0)); - - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) { - // Create function call - FunctionCallee tensorReluCPU; - DECLARE(tensorReluCPU); - CallInst::Create(tensorReluCPU, Args, "", II); - } else if (II->getIntrinsicID() == - Intrinsic::hpvm_tensor_clipped_relu) { - // Create function call - //-- FunctionCallee tensorClippedRelu; - FunctionCallee tensorRelu2CPU; - DECLARE(tensorRelu2CPU); - CallInst::Create(tensorRelu2CPU, Args, "", II); - } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) { - // Create function call - FunctionCallee tensorTanhCPU; - errs() << "tensorTanh Call = \n\n"; - DECLARE(tensorTanhCPU); - // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l"; - CallInst::Create(tensorTanhCPU, Args, "", II); - } - - // We can replace the call to hpvm.tensor.relu with the 1st argument - // that, due to in place operation, now contains the result - II->replaceAllUsesWith(II->getOperand(0)); - - // Mark to remove at the end - IItoRemove.push_back(II); - } break; - - case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */ - DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n"); - // Tensor relu(a) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - std::vector<Value *> Args; - Args.push_back(II->getOperand(0)); - - // Create function call - FunctionCallee tensorSoftmaxCPU; - DECLARE(tensorSoftmaxCPU); - CallInst::Create(tensorSoftmaxCPU, Args, "", II); - // We can replace the call to hpvm.tensor.softmax with the 1st argument - // that, due to in place operation, now contains the result - II->replaceAllUsesWith(II->getOperand(0)); - - // Mark to remove at the end - IItoRemove.push_back(II); - } break; - - default: - llvm_unreachable("Unknown VISC Intrinsic!"); - break; - } - } - } - - // We need to do this explicitly: DCE pass may not remove them. - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around. - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); - ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - errs() << "Erasing: " << **ri << "\n"; - (*ri)->eraseFromParent(); - } - - return; -} - -bool DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls PASS\n"; - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - // Get the In Place Analysis Results - InPlaceDFGAnalysis::InPlaceDFGParameter IPP = - (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); - // Print results - printInPlaceDFGParameter(IPP); - - std::vector<DFInternalNode *> Roots = DFG.getRoots(); - - // Visitor for Code Generation Graph Traversal - CGT_ReplaceApproxHPVMIntrinsicsWithFCalls *CGTVisitor = - new CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(M, DFG, IPP); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode : Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - // TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -} // End of namespace - -char DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls::ID = 0; -static RegisterPass<DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls> X("replace-intrinsics", - "Replace ApproxHPVM intrinsics with LLVM calls", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); diff --git a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.exports b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.exports deleted file mode 100644 index 139597f9cb07c5d48bed18984ec4747f4b4f3438..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.exports +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index 2feeaa2fefeb5b1a7dd937816e785a1dd641a5e4..d28868892f6d45e6905594e143a13aa83b1db9d6 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -78,7 +78,7 @@ set( RUNTIME_SRCS_FILENAME approx_simulation.cu group_conv.cu - approx_techniques2.cu + approx_techniques.cu common.cpp configuration.cpp debug.cc @@ -178,158 +178,72 @@ add_dependencies(tensor_runtime_online tensor_runtime) target_link_libraries(tensor_runtime_online ${LINK_LIBS}) target_compile_definitions(tensor_runtime_online PRIVATE -DONLINE_PROFILING=true -DFP16_tuning=false) -# Adding new rule for building a cuDNN runtime library -#-- find_package(OpenMP REQUIRED) -#-- cuda_add_library(tensor_cpu_runtime tensor_runtime/src/tensor_cpu_runtime.cc) -#-- target_compile_options(tensor_cpu_runtime PRIVATE ${OpenMP_CXX_FLAGS}) -#-- target_link_libraries(tensor_cpu_runtime PRIVATE ${OpenMP_CXX_FLAGS}) -### TODO: Remove unsued CMake rules after careful consideration +# -------------- Unit Test Source ---------------- -# Adding rule for the debugging source add_executable(unit_tests dnn_sources/src/unit_tests.cc) target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -#**************** FP32 Source Builds *********** - -add_executable(lenet_mnist dnn_sources/src/lenet_mnist.cc) -target_link_libraries(lenet_mnist tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_cifar10 dnn_sources/src/alexnet_cifar10.cc) -target_link_libraries(alexnet_cifar10 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_cifar10 dnn_sources/src/alexnet2_cifar10.cc) -target_link_libraries(alexnet2_cifar10 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10 dnn_sources/src/vgg16_cifar10.cc) -target_link_libraries(vgg16_cifar10 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_cifar10 dnn_sources/src/resnet18_cifar10.cc) -target_link_libraries(resnet18_cifar10 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100 dnn_sources/src/vgg16_cifar100.cc) -target_link_libraries(vgg16_cifar100 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet dnn_sources/src/mobilenet.cc) -target_link_libraries(mobilenet tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_shallow dnn_sources/src/mobilenet_shallow.cc) -target_link_libraries(mobilenet_shallow tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet50_imagenet dnn_sources/src/resnet50_imagenet.cc) -target_link_libraries(resnet50_imagenet tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - - - -#********* FP16 Source Builds ****** - -add_executable(lenet_half dnn_sources/src/half/lenet_mnist_half.cc) -target_link_libraries(lenet_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_half dnn_sources/src/half/alexnet_cifar10_half.cc) -target_link_libraries(alexnet_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_half dnn_sources/src/half/alexnet2_cifar10_half.cc) -target_link_libraries(alexnet2_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_half dnn_sources/src/half/resnet18_cifar10_half.cc) -target_link_libraries(resnet18_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10_half dnn_sources/src/half/vgg16_cifar10_half.cc) -target_link_libraries(vgg16_cifar10_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100_half dnn_sources/src/half/vgg16_cifar100_half.cc) -target_link_libraries(vgg16_cifar100_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_half dnn_sources/src/half/mobilenet_half.cc) -target_link_libraries(mobilenet_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_shallow_half dnn_sources/src/half/mobilenet_shallow_half.cc) -target_link_libraries(mobilenet_shallow_half tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - - - -#********* Promise API sources - Used with the Autouner - -add_executable(lenet_promise dnn_sources/src/promise/lenet_promise.cc) -target_link_libraries(lenet_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - -add_executable(alexnet_promise dnn_sources/src/promise/alexnet_promise.cc) -target_link_libraries(alexnet_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_promise dnn_sources/src/promise/alexnet2_promise.cc) -target_link_libraries(alexnet2_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_promise dnn_sources/src/promise/resnet18_promise.cc) -target_link_libraries(resnet18_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100_promise dnn_sources/src/promise/vgg16_cifar100_promise.cc) -target_link_libraries(vgg16_cifar100_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10_promise dnn_sources/src/promise/vgg16_cifar10_promise.cc) -target_link_libraries(vgg16_cifar10_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_promise dnn_sources/src/promise/mobilenet_promise.cc) -target_link_libraries(mobilenet_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_shallow_promise dnn_sources/src/promise/mobilenet_shallow_promise.cc) -target_link_libraries(mobilenet_shallow_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - +#**************** FP32 TensorRT Source Builds *********** -add_executable(vgg16_imagenet_promise dnn_sources/src/promise/vgg16_imagenet_promise.cc) -target_link_libraries(vgg16_imagenet_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc) +target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(resnet50_imagenet_promise dnn_sources/src/promise/resnet50_imagenet_promise.cc) -target_link_libraries(resnet50_imagenet_promise tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc) +target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc) +target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc) +target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc) +target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc) +target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc) +target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -# OpenTuner Piped Sources -add_executable(alexnet_piped dnn_sources/src/promise/alexnet_piped.cc) -target_link_libraries(alexnet_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc) +target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(alexnet2_piped dnn_sources/src/promise/alexnet2_piped.cc) -target_link_libraries(alexnet2_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc) +target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(lenet_piped dnn_sources/src/promise/lenet_piped.cc) -target_link_libraries(lenet_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc) +target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(resnet18_piped dnn_sources/src/promise/resnet18_piped.cc) -target_link_libraries(resnet18_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(vgg16_cifar10_piped dnn_sources/src/promise/vgg16_cifar10_piped.cc) -target_link_libraries(vgg16_cifar10_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(vgg16_cifar100_piped dnn_sources/src/promise/vgg16_cifar100_piped.cc) -target_link_libraries(vgg16_cifar100_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(mobilenet_piped dnn_sources/src/promise/mobilenet_piped.cc) -target_link_libraries(mobilenet_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#********* FP16 TensorRT Source Builds ****** -add_executable(mobilenet_shallow_piped dnn_sources/src/promise/mobilenet_shallow_piped.cc) -target_link_libraries(mobilenet_shallow_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc) +target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(vgg16_imagenet_piped dnn_sources/src/promise/vgg16_imagenet_piped.cc) -target_link_libraries(vgg16_imagenet_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc) +target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(resnet50_imagenet_piped dnn_sources/src/promise/resnet50_imagenet_piped.cc) -target_link_libraries(resnet50_imagenet_piped tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc) +target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc) +target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc) +target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc) +target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -#### Image Processing Benchmarks +add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc) +target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(alexnet2_canny dnn_sources/src/alexnet2_canny.cc) -target_link_libraries(alexnet2_canny tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists_cpu.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists_cpu.txt deleted file mode 100644 index cff0129c2aa02b9776ed7bba8e92029d2c2560e8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists_cpu.txt +++ /dev/null @@ -1,19 +0,0 @@ -cmake_minimum_required (VERSION 2.6) -project (approxhpvm-tensorRt-cpu) - - -# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 " ) - - -# Adding new rule for building a cuDNN runtime library -add_library(tensor_cpu_runtime tensor_runtime/src/tensor_cpu_runtime.cc) -target_link_libraries(tensor_cpu_runtime) - - -#**** CPU sources -add_executable(fc2_cpu dnn_sources/src/fc2_cpu.cc) -target_link_libraries(fc2_cpu tensor_cpu_runtime) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/knobs.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/knobs.txt deleted file mode 100644 index 1be644441769e8544901010586bc9842d8b14289..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/knobs.txt +++ /dev/null @@ -1,28 +0,0 @@ -perf_fp16,120,Baseline -perf_fp16,151,Col perf 50% offset=0 -perf_fp16,152,Col perf 50% offset=1 -perf_fp16,153,Row perf 50% offset=0 -perf_fp16,154,Row perf 50% offset=1 -perf_fp16,155,Col perf 33% offset=0 -perf_fp16,156,Col perf 33% offset=1 -perf_fp16,157,Col perf 33% offset=2 -perf_fp16,158,Row perf 33% offset=0 -perf_fp16,159,Row perf 33% offset=1 -perf_fp16,160,Row perf 33% offset=2 -perf_fp16,161,Col perf 25% offset=0 -perf_fp16,162,Col perf 25% offset=1 -perf_fp16,163,Col perf 25% offset=2 -perf_fp16,164,Col perf 25% offset=3 -perf_fp16,165,Row perf 25% offset=0 -perf_fp16,166,Row perf 25% offset=1 -perf_fp16,167,Row perf 25% offset=2 -perf_fp16,168,Row perf 25% offset=3 -samp_fp16,261,Samp 50% offset=0 -samp_fp16,262,Samp 50% offset=1 -samp_fp16,263,Samp 33% offset=0 -samp_fp16,264,Samp 33% offset=1 -samp_fp16,265,Samp 33% offset=2 -samp_fp16,266,Samp 25% offset=0 -samp_fp16,267,Samp 25% offset=1 -samp_fp16,268,Samp 25% offset=2 -samp_fp16,269,Samp 25% offset=3 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_120.txt deleted file mode 100644 index f3e1be03b607bcf404a6cb809f1e231d497b19c6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,65.2289,635838 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.206437,13534.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.171279,13528.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,257.082,2.53801e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220024,13871.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.191798,13871.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.78132,31392.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,79.5717,772941 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208738,14141.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.179234,14145.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,132.214,1.32889e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213516,14286.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.178101,14283.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.07068,28582.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,44.624,449041 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199375,14430.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.155295,14431.9 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,75.4301,795035 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.205183,14622.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.157695,14624.5 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.24782,14616.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.20811,15614.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.164572,14641.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.996439,15121.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_151.txt deleted file mode 100644 index 891ef4648247e6a7879f0a8c974b3b9b59193105..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,58.0027,548113 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.189544,13050.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.17103,13052 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,128.221,1.22437e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217308,13335.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.17494,13336.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.54381,29480.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,48.9069,459648 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213516,13612.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.19278,13600.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,66.2285,651716 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.205122,13690.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.174924,13694.9 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.83244,27400 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,27.4047,267152 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.190466,13844.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.149922,13851.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,39.169,396057 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.186793,13947 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.147929,13947.8 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.12431,14343.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.09979,14347.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.145526,13961.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.939092,13969.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_152.txt deleted file mode 100644 index 0e1dc661467bd46ffab34faf5f85ee1254068b7c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,57.9254,550132 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.188185,13098.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.168668,13100.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,128,1.22987e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217096,13371.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.17532,13354.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.45412,26727.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,48.745,462457 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.20812,13623.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.176805,13623.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,66.1231,650723 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.204978,13775.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.185167,13766.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.89592,25380.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,27.2569,267804 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.187973,13877.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.146757,13869.7 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,39.3148,398486 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.18485,13995 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.147977,14000.1 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.16044,13997.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.14272,14765.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.154716,14008.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.967361,14016.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_153.txt deleted file mode 100644 index 211011c1c8c194c7c81ec30abe02b85d03de8f95..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,52.3167,486159 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.200114,12880.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.175215,12886.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,122.54,1.15844e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212952,13161.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.179759,13157.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.5318,27617.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,47.8415,444086 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203436,13433.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.170578,13433.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,65.082,630988 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213602,12821.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.166886,12826.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.80028,25660.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,27.811,255106 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.186975,13659.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.158729,13644.4 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,39.012,391835 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.183039,13762.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.147983,13761.3 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.16698,13761.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.16997,14528.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.16397,13782.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.946711,13787 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_154.txt deleted file mode 100644 index 4e4718b997d2acdad6386541c328fd778daf9f92..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,52.9971,496465 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.192665,12930.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.170856,12926.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,122.764,1.15342e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.215928,13205.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.175704,13204.8 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.49985,26407.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,47.9188,445564 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.204233,13461.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.177759,13459.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,64.6805,628375 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210421,13606 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.173551,13591.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.87541,26494.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,27.3168,264238 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.190476,13723.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.150409,13732.8 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,38.9734,395354 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.185871,13830.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.147122,13835.5 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.13866,13834.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.1186,14601.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.15205,13856.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.938171,13864.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_155.txt deleted file mode 100644 index 394d24cf90f4313068db4ecf05cdc388d4178799..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,64.8669,622301 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.20949,13239.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.176294,13243.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,162.006,1.5735e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220239,13511.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.178239,13511.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.75893,29646.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,57.036,545033 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208872,13815.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.181833,13813.6 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,84.4746,841306 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.216168,13970.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.181836,13976.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.06704,27968.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,32.4425,322697 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199647,14092.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.160076,14084.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,48.8451,506372 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.196623,14181.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.159896,14172.9 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.22106,14178.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.21282,15029.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.16867,14202.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.963022,14204.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_156.txt deleted file mode 100644 index 03318bf3aeeadd855ab379bfcb0db83e3adb17df..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,63.1272,608633 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.203714,13382.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.179644,13369.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,159.062,1.56322e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219154,13650.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.176476,13654.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.6318,29500.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,58.7054,568593 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.212313,13955.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.186098,13945.6 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,88.1808,883262 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.211765,14119.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.179279,14113.7 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.87673,27502.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,31.7267,314842 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199883,14261.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.168866,14253.9 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,47.7887,497470 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.194706,14369.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.161791,14355.9 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.18783,14357 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.19429,15205.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.174949,14370.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,1.00356,15533.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_157.txt deleted file mode 100644 index 492feb3022d61db674a49dde67e8f1c98f5c95dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,63.9795,627055 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.207468,13560.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.172213,13546.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,164.125,1.62846e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.21318,13828.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.174274,13824.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.56693,28880.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,58.2621,572924 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.205416,14092.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.182895,14103 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,88.3811,890106 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213208,14246.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.179445,14255.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.93471,28531.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,34.5703,350902 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.201976,14415.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.164367,14414.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.1299,560793 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.195474,14557 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.158092,14560 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.15914,14565.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.14588,15448.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.158146,14582.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.947485,15022.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_158.txt deleted file mode 100644 index 55b44395936071c1b5c03e233e3d2b1622aab333..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,59.0766,557171 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.203615,13064.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.17004,13070.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,156.515,1.48722e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.242274,13321.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.179781,13329.7 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.60261,28731.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,55.4282,523293 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211531,13627 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.179788,13630.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,82.562,806140 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.20909,13773.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.179433,13771.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.89329,27557.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,32.1691,313581 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.201173,13897.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.15526,13902.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,48.4451,495873 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.200427,14025.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.15853,14018.5 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.21556,14018.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.20271,14854.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.135273,14049.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.970621,14465.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_159.txt deleted file mode 100644 index c1cd63239796ff96ce9113f5b9d0afb6a565ab5b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,59.2911,555019 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.201084,12973.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.170233,12969.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,156.931,1.49121e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217797,13251.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.174917,13245.8 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.71397,29190.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,57.8418,542994 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21652,13546.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.178658,13552.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,87.9539,849490 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217259,13704.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.179077,13693.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.03865,27430.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,32.5739,308590 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199641,13812 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.161615,13811.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,49.129,497072 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208169,13936 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.161935,13937.9 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.28389,13935.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.25362,14764.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.163733,13937.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,1.00077,14665.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_160.txt deleted file mode 100644 index 6ee9d4cd79646b783be1434b4dcfbfedf40cb4dd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,58.8135,556846 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.203772,13190.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.176348,13196.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,158.962,1.53725e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219541,13440.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.178588,13431.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.68237,28875.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,56.3609,533809 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210882,13724.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.176328,13726.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,86.1982,845559 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214652,13882.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.173688,13888.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.96952,27069.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,34.5895,340452 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.200648,14023.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.159231,14025 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,52.939,546006 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.197173,14157.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.150853,14157.7 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.16124,14167.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.17195,15013 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.157372,14168.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.983837,14180.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_161.txt deleted file mode 100644 index 3b49fae064a89d59a97f8584aa73d7c48173c14b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,65.8243,634762 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.205215,13383 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.179957,13375.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,174.316,1.70456e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219689,13640.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.182165,13619.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.68031,29935.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,61.4199,594851 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.215746,13926 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.185436,13916.6 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,93.149,925082 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213054,14081 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.180697,14075.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.99828,28166.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,35.2346,352384 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.204687,14205.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.157567,14209.1 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.4125,557191 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20316,14374.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.159049,14361.3 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.19671,14363.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.18429,15228.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.17158,14372.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.963239,14813.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_162.txt deleted file mode 100644 index e1a046ef01a51fd9d03c0f4447a47569b9f65caa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,66.1421,634983 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.21741,13337.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.185688,13331.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,176.119,1.69722e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.227336,13574 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.188882,13570.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.6811,30306.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,61.7431,583447 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.219186,13881.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.181445,13877.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,93.914,932514 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.219256,14007.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.1794,13997.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.93367,28014.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,35.3477,350762 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.208718,14161.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.161401,14163.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.9521,560676 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.204236,14322.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.157525,14318.9 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.23967,14322.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.22098,15186.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.169871,14326.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.978561,14755.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_163.txt deleted file mode 100644 index ad8ef2563416b83c6a2661c5d7ec30f90cb37926..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,66.2346,634503 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.209573,13306.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.174533,13306.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,176.314,1.70236e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.234539,13560.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.184031,13556.8 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.71175,30262.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,61.7066,582837 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.221103,13124.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.177237,13110.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,94.1784,928952 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.219455,13997.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.18468,13999.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.97442,28016.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,35.4772,352075 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.209503,14157.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.161119,14161.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,54.1191,562078 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.207996,14315.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.160258,14309.8 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.23714,14309.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.21279,15183.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.175522,14321.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.977623,14744.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_164.txt deleted file mode 100644 index 3461775f35e069cfe4f597e2a0ec685e4a257c0d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,65.9398,633363 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.205851,13291 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.172959,13287.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,174.804,1.69553e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220789,13560.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.177087,13564.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.81027,31205.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,61.5282,590104 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209931,13849.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.18996,13845.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,93.4214,924284 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21342,13978.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.184242,13988.7 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.0622,27996.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,34.9337,342861 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.202178,14110.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.160732,14112.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.8579,556438 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.199122,14251.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.157836,14257.4 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.22002,14266.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.23049,15130.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.172575,14268.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,1.02197,15447.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_165.txt deleted file mode 100644 index a78c775332f9a0b6160456e92466fca7b2294a5a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,61.5774,582474 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.210591,13093.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.16853,13084.1 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,171.588,1.6167e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.230504,13322.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.176787,13316.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.79893,31381.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,60.6244,573649 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.223733,13607.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.179596,13617.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,92.717,894994 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.221378,13754.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.177762,13744.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.00882,27528.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,34.7619,342294 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.204523,13901.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.15829,13901.8 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.3066,543043 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.203224,14037 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.161542,14042.7 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.26148,14044.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.21381,14900.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.171011,14056 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.964679,14479 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_166.txt deleted file mode 100644 index 8c700396a9e51f8addd4db4ff402ff57424c7a50..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,61.7188,579843 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.205929,13080.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.170476,13046.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,170.797,1.62614e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221976,13303.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.172012,13305.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.66512,28575.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,60.6009,571260 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.212117,13600.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.198482,13602 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,92.7343,897664 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214779,13754.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.180697,13735.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.93166,27486 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,35.0139,341225 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.198821,13911.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.15717,13913.2 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.8398,547490 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.200568,14061.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.154255,14051.6 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.22431,14057.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.21994,14922.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.143932,14068.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.950046,14491.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_167.txt deleted file mode 100644 index ed26859f07eb0d4c10b16b1d8a1e1df46c2700b4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,60.3008,566150 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212953,13192.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.16919,13192.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,168.206,1.62008e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.216709,13429.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.176031,13418.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.62383,28235.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,59.0124,557003 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.2075,13707.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.183634,13707.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,90.5798,881313 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214488,13884.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.174665,13865.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.87406,27761.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,34.5362,338970 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.197125,14010.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156981,14014.1 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,53.1529,543496 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.199279,14147.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.152859,14148 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.22611,14878.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.14545,15004 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.159065,14156.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.967012,15316.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_168.txt deleted file mode 100644 index b9ea435a191c3e97278a0de963a378bf0b41a587..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,60.0049,566283 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.198763,13133.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.171007,13131.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,168.986,1.60868e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214533,13381.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.17733,13378.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.72244,29920.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,59.1517,556478 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209778,13645.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.183487,13653.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,91.8964,875586 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.218393,13825.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.178373,13823.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.097,27656.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,34.1944,334924 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199813,13966.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.157509,13945.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,52.856,538205 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.201836,14090.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.156297,14091.1 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.21402,14100.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.20897,14950.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.173667,14119.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.958647,14548.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_261.txt deleted file mode 100644 index 4ceca9d3b4337e38f2505ded41e53696bce2a3d7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,29.9901,283881 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.178492,13190.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.172648,13198.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,167.328,1.63105e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.213493,13541.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.182031,13537.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.6555,27766.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,49.9733,474729 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.197967,13766.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.176799,13753.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,84.2239,851536 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.197804,13933.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.173186,13933.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.09551,27874.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,30.1239,303528 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.185539,14072.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.148063,14057.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,55.3267,563272 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.177714,14210.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.151116,14212.2 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.13263,14214.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.17752,15039.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.157078,14227.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.949831,14229.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_262.txt deleted file mode 100644 index 594b3c6cb4546672a49d7d15cfaef6e7cdedd2cc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,30.6343,291631 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.175893,13148.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.16885,13150.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,167.55,1.62919e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.208101,13511.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.181103,13511.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.69895,31258.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,49.6537,469423 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.20214,13730.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.183407,13074.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,84.4516,851268 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.197845,13902.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.175394,13893.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.03863,27061.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,30.1611,302847 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.179497,14023.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.157608,14019.6 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,55.0979,577538 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.17278,14181.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.155212,14190.7 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.15995,14196.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.15144,15014.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.170053,14202.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.927195,14610 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_263.txt deleted file mode 100644 index daac5adf34f1af981d7170b5b9405b7bc3acda54..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,38.5323,380280 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.183311,13716.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.166876,13706.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,246.805,2.50131e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.207515,14125.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.180767,14122 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.67711,29548.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,68.6879,684023 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.201026,14390.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.175177,14388.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,124.545,1.31127e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.20485,14513 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.181132,14520.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.99895,29075.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,42.71,444867 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.185577,14691.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.155711,14683.8 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,79.6438,863928 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.198735,14844.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.155318,14831.4 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.21923,14833 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.23007,15807.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.179961,14844.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.944167,15336.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_264.txt deleted file mode 100644 index 8e9d08eb04228615230676bcf297745e1309040b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,37.9249,377238 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.183003,13840.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.172521,13842.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,244.866,2.50419e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.210677,14226.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.189071,14222.7 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.59858,28454.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,68.3239,682954 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.198991,14506.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.180629,14515.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,123.521,1.30914e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.203701,14661.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.181269,14656 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.89143,29323.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,42.2852,445845 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.187503,14802.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.154306,14798.8 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,79.0359,862979 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.188383,14951.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.158735,14940.1 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.20675,15447.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.07758,15450.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.140345,14971.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.979124,15459.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_265.txt deleted file mode 100644 index a9a49265efa868fba12d672e22553fac1245d9fa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,38.6073,381890 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.183922,13657.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.167353,13655.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,247.161,2.49436e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212076,14055.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.184153,14062.7 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.74619,30741.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,68.3499,680826 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.202505,14325.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.180396,14329.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,124.93,1.28676e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.204786,13799.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.175125,13782.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.03242,28261.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,42.9068,448281 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.191074,14629 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.153951,14617.2 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,79.7328,862210 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.191826,14783.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.156604,14787.1 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.19046,14789 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.23317,15763.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.1715,14808 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.950132,15288.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_266.txt deleted file mode 100644 index 17d596384117b149418ce2f2e7588b660aad2328..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,34.6388,330159 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.1851,13519.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.163324,13525.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,211.763,2.10457e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.208022,13853 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.172867,13851.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.61003,28957 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,59.4769,581575 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.20364,14117.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.186194,14113.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,107.233,1.09327e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.203724,14290.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.183164,14294.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,1.95363,28612.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,39.2333,403131 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.194492,14459.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.154262,14446.4 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,72.6672,778814 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.191938,14625.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.155673,14618.1 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.1725,14620.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.17531,15526 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.168101,14628 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.942702,15085.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_267.txt deleted file mode 100644 index 2a49166de84cb5d607169c5703c2acd1173b2d56..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,35.3669,338192 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.189141,13532.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.176591,13521 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,211.87,2.10409e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.21045,13851.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.180578,13849.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.73284,29157.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,58.9372,579132 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203445,14140.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.1863,14151.6 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,107.227,1.0916e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.207173,14277.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.177253,14279.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.05931,28583.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,39.4378,402404 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.197631,14460 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.161132,14444.7 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,72.8899,772755 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.191496,14641.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.155829,14628.5 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.16222,14634.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.18754,15547.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.165084,14642.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.931806,15103.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_268.txt deleted file mode 100644 index 7ffc46b3fa02f3f4cd25aeb7fe2bfece3a6d13f0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,35.1044,343285 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.185285,13540.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.165138,13536.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,211.784,2.10554e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.216373,13853.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.180239,13853.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.7821,29895.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,58.7825,573791 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211295,14119.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.17844,14119.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,107.332,1.09268e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.208469,14294.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.177596,14296.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.08082,28605.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,39.3551,404975 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.200089,14439.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156082,14434 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,72.6197,776642 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.186354,14622.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.155119,14630.5 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.16887,14638.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.19008,15550.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.165605,14646.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.942276,14654 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_269.txt deleted file mode 100644 index dc9228cefaba6b9fa024a854fbd24b1228b144c8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,35.9637,343026 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.185477,13412.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.169158,13406.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,213.539,2.10433e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.213954,13732.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.185995,13738.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,2.77092,30824.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,59.257,574879 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203871,14027.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.178715,14037.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,108.244,1.09504e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.211721,14189.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.178908,14195.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,2.0528,28416.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,39.7267,403494 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.193717,14337 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.160898,14335 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,73.0412,773266 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.192258,14537.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.158802,14528 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,1.22562,14522.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.34877,16977.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.179394,14555.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.978526,15010.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp32_perf_fp32_120.txt deleted file mode 100644 index 46c04e6d4c91ff5a1d4f0c98028c6c76a1710a81..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet2_cifar10/alexnet2_cifar10_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,72 +0,0 @@ -Conv1,110.867,937287 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.2524,11951.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.148963,11939.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,405.61,3.38379e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.52405,14207.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.314892,12284.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,7.49997,70147.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,146.869,1.24647e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.22959,12498.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.167686,12501 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,242.23,2.12642e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212758,12732 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.139776,12735.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,4.07526,42078.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,70.2196,641128 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.201587,12969.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.157597,12973.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,121.098,1.12443e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208646,13218.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,0.140093,13224.5 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,2.33548,27133.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,0.665967,13777.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.132666,13251.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,0.926642,14597.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_120.txt deleted file mode 100644 index a9c2b75d063b189adac655c77df93f5dc29b5aa7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,361.205,887540 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.8371,25251 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.89826,25346 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1282.07,4.13212e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.66204,37762 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.98363,37781 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,17.6103,90940 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,420.625,1.73415e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.85413,41830 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.71096,41925 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1434.84,5.80943e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.26222,43549 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.87823,43588 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,14.8755,93369 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,230.687,1.08731e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.81058,45573 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.70434,45611 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,386.122,1.88489e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.91599,48344 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.88405,48344 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,10.1237,78290 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.99616,67944 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.04792,48554 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.67766,66936 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.80403,48440 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.95634,48440 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,350.798,1.77138e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,68.4447,356768 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.0155,146006 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,773.778,3.92759e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,447.052,2.23187e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,422.442,2.2019e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,56.5507,316578 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3619,182871 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.7478,75963 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,44.8441,253942 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_151.txt deleted file mode 100644 index 0a3f9b79c853297a54edee22f7e34296a3025c2b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,341.714,1.24364e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.01032,35383 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.81735,35419 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,672.798,2.67555e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.64978,40633 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.07404,40741 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.7876,81823 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,246.262,1.05848e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.36662,43206 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.02646,43238 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,344.014,1.55449e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.53103,45471 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.16501,45487 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,11.9697,72632 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,155.126,744177 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.07947,46898 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.29868,46819 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,222.859,1.1246e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.1875,48225 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.35925,48221 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,10.3547,86737 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.5923,58118 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.55215,48415 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.25993,58146 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.24436,48244 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.13347,48148 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.153,1.78208e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.6011,365656 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.3887,149498 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,380.734,1.91727e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,258.012,1.30302e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,254.065,1.31266e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,47.5991,258579 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,28.0997,160667 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,8.53453,58494 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,41.037,223342 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_152.txt deleted file mode 100644 index 0e370a50e6ec6468276d597bed6382a367d34133..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,338.653,1.22516e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.49818,35363 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.7734,35383 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,673.747,2.65805e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.72712,40710 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.994615,40797 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,16.2806,81620 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,241.163,1.03265e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.17343,43160 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.854522,43232 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,342.155,1.50794e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.54565,45654 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.22057,45727 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,11.3366,69365 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,150.324,720358 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.47724,46811 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.32594,46888 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,219.824,1.09715e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.71662,48366 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.02462,48404 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,10.3584,78167 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.2894,61905 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.67705,48499 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.31139,63276 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.16947,48345 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.93852,48117 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,364.322,1.77585e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.1004,353948 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.256,151166 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,370.56,1.8226e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,255.449,1.25407e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,251.415,1.29241e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,48.623,261943 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.175,170000 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,8.85507,63150 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,41.1791,231371 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_153.txt deleted file mode 100644 index 2f4fe3fc94397ef75b2b4543fcd72d62a251c539..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,313.584,1.14369e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.9961,35442 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.75755,35461 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,648.569,2.56959e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.02472,40420 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.16678,40466 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.6438,85388 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,240.467,1.02632e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.51647,42923 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.924186,43115 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,339.393,1.52091e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.51628,45120 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.13145,45079 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,13.4161,90533 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,149.177,687919 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.61439,46285 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.16799,46401 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,221.957,1.11058e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.56843,47735 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.70929,47772 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,10.9723,90939 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.98131,51771 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.62773,47906 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,11.0935,75974 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.54774,47622 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.20792,47603 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,403.556,2.01366e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,97.3704,495605 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.8325,213694 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,557.298,2.7772e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,362.925,1.7864e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,367.019,1.81001e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,70.1719,382270 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,41.0814,234379 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,15.901,102022 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,60.4126,330986 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_154.txt deleted file mode 100644 index 765c4ab9e3829e9793ce7f48b07f997cdbec7f75..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,315.379,1.16143e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.73517,35843 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.22104,35862 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,651.303,2.5903e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.74155,40626 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.729368,40639 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.1605,81426 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,236.06,1.00358e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.20156,43136 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.795643,43155 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,337.277,1.50712e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.64914,45323 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.0735,45339 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,12.5338,71911 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,152.238,733302 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.40817,50053 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.34367,46552 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,218.448,1.07146e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.36034,48079 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.2093,48098 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.3163,91399 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.68467,57846 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.90696,48171 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.91817,71589 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.82864,48004 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.80968,47907 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,435.426,2.16466e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,108.727,553915 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,41.8594,233214 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,616.852,3.0909e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,401.057,1.9871e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,409.951,2.06242e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,78.7003,426478 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,45.3634,257888 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,16.0649,99414 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,65.2346,361740 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_155.txt deleted file mode 100644 index 495b8e72ce969009a8a1098cffb1b025ff011728..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,367.586,1.37544e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.70513,36722 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.05311,36739 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,835.213,3.43808e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.499,42786 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.972346,42819 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,14.9356,85825 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,288.55,1.29369e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.40882,45555 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.997081,45555 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,428.832,2.03842e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.68434,48161 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.6868,48192 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,14.4762,96820 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,188.781,981227 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.77355,49647 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.3157,49647 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,292.4,1.57032e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.81982,51286 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.37624,51324 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.2112,102762 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,10.4728,81945 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.74428,51381 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.82457,61741 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.3466,51039 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.82338,51020 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,341.701,1.83633e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,65.475,355486 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,27.9864,148305 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,459.432,2.42759e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,300.123,1.56346e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,297.714,1.56658e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,48.3095,264674 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.424,173204 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.1637,86228 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,44.1133,252868 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_156.txt deleted file mode 100644 index f41af92f3cfc1a6067663065f53738c15b0bf3f0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,365.82,1.38229e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.86135,37012 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.2317,37012 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,835.167,3.46916e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.72744,43081 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.07906,43094 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,16.1615,86240 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,300.757,1.32601e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.3894,45882 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.988184,45920 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,453.354,2.17372e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.96965,48616 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.89493,48627 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,16.5878,101588 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,194.167,1.02167e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,4.44685,53990 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.81678,50087 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,282.513,1.52215e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.19722,51663 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.4803,51701 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,10.7573,82207 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.65599,66535 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.10187,51739 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.84134,62346 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.02711,51723 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.69407,51495 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,343.65,1.85844e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,66.7983,358767 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.0086,154621 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,466.629,2.489e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,304.943,1.59106e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,302.615,1.58389e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,49.4705,287963 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,30.9342,189526 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.8809,87008 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.3725,260890 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_157.txt deleted file mode 100644 index 46cb2c584851b98944162e561e68c378332f13db..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,370.217,1.40762e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.22554,37220 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.92616,37249 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,856.722,3.56485e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.6708,43476 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.46693,43508 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,16.5105,91535 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,305.956,1.41095e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.53477,46248 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.39539,46204 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,455.469,2.22189e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.85246,49098 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.63118,49153 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,17.2593,107081 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,206.479,1.08538e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.75124,50449 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.87596,50449 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,302.047,1.65556e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.65649,52282 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.9676,52301 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.8465,99367 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.95874,72873 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.35496,52436 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.91958,57749 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.3887,52113 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.9476,52113 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,345.747,1.89194e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,67.0786,367483 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.309,155718 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,481.635,2.59297e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,314.62,1.67524e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,314.968,1.68945e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,51.4902,300879 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.5453,196186 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.0044,82161 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,44.6534,251589 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_158.txt deleted file mode 100644 index ef02807f8f589787ebf2adc839e9410c16414b53..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,345.457,1.28233e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.68836,35937 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.07205,35975 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,811.279,3.27855e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.72946,41898 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.27244,41949 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,16.0697,88412 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,285.874,1.26548e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.96683,44618 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.09865,44676 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,425.288,1.99292e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.96677,47315 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.17589,47370 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,15.4633,99614 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,186.796,953798 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.48859,48565 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.17202,48603 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,287.804,1.50853e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.65,50170 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.2331,50189 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.7266,89977 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.34201,65748 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.36465,50189 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,8.66339,64282 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.09815,49960 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,4.89225,49960 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,361.102,1.888e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.1473,377537 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.202,154986 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,495.785,2.58001e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,320.547,1.64075e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,320.266,1.66296e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,51.7552,286193 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,30.4092,175602 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1634,75129 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.3624,254708 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_159.txt deleted file mode 100644 index a1dfc26c51f2c81cdbe8af9f3aecc47b1280e1e1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,339.094,1.26048e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.96416,36128 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.77992,36163 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,813.76,3.31365e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.64994,42067 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.07708,42064 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,14.8503,84343 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,293.589,1.31011e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.3999,44879 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.05596,44834 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,448.595,2.107e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.3315,47603 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.04386,47621 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,14.6064,94214 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,193.015,1.00842e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.50711,48920 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.48661,48920 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,289.684,1.54004e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.68606,50297 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.25317,50316 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.0975,100976 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.96732,70276 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.2283,50564 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.2103,55553 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.29655,50335 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.29321,50258 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,351.462,1.85714e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,71.227,389308 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.2131,161340 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,501.805,2.62186e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,324.086,1.66061e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,324.179,1.6786e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,52.8531,301753 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.8956,190928 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.7146,90426 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,48.6169,275573 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_160.txt deleted file mode 100644 index 71ac47720756a629f3e14d55cc444a87230c5ba9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,344.736,1.29476e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.74231,36242 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.35918,36299 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,835.655,3.42441e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.09224,42331 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.70002,42246 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,24.1042,124515 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,305.48,1.39635e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.33278,45027 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.73698,45038 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,448.089,2.15419e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.05134,47781 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.79851,47789 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,17.4017,110314 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,201.163,1.05746e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.5874,49107 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.32724,49126 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,298.623,1.58674e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.25227,50638 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.11102,50752 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.3346,91373 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.4071,60007 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.79112,50828 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.504,60122 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.05558,50600 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.80988,50603 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,352.716,1.87049e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.9986,377044 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.693,162274 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,502.505,2.64698e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,317.991,1.60963e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,319.62,1.6465e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,48.7611,272981 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3448,186827 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.0792,81356 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,49.0611,267293 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_161.txt deleted file mode 100644 index 71f39ee42f050dc07c92f82ad2155db1c0cc3fdb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,378.483,1.44377e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.16813,37354 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.25211,37202 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,905.01,3.78967e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.41608,43519 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.68306,43573 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,19.4582,105246 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,331.99,1.57104e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.31531,46344 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.26824,46361 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,485.924,2.40972e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.85947,49214 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.37848,49150 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,20.9687,122884 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,197.661,1.06623e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.63787,50453 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.2229,50453 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,296.445,1.62111e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.2027,52149 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.28997,52146 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.8256,104486 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.95045,78259 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.4269,52341 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.6427,68164 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.70788,52034 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.69813,51958 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,346.748,1.8922e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,68.4736,377821 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.9813,156405 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,536.616,2.89048e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,344.142,1.82033e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,344.951,1.83101e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,50.2737,285095 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,33.3015,206213 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.5197,92863 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.8158,273128 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_162.txt deleted file mode 100644 index d5b585eb367a1de62f05f02e78da08c0809625d3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,376.051,1.43644e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.85018,37259 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.19583,37275 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,903.953,3.79981e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.36888,43749 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.17659,43743 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,19.4205,113593 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,333.107,1.57337e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,3.22801,46684 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.21371,46628 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,481.782,2.40292e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.91195,49343 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.08034,49362 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,19.4,109107 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,197.676,1.04809e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.03656,50768 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.57624,50806 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,298.469,1.64777e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.01322,52469 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.26031,52393 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.8955,105111 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.20323,58051 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,3.18967,56831 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.956,72962 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.10368,52316 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.02162,52242 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,353.033,1.93595e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,67.0839,369030 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.8589,156817 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,529.37,2.85811e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,339.888,1.78149e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,338.989,1.80672e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,47.7108,271072 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.8336,202963 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,13.1019,88026 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.4985,278681 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_163.txt deleted file mode 100644 index 51a8fab32a0d8ff66be1c3027ba4a4cd98dae942..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,376.623,1.43041e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.2712,37335 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.29774,37259 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,906.73,3.79983e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.93634,43709 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.93493,43740 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,20.1087,113270 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,340.112,1.62416e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,3.40813,46586 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.31909,46626 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,482.654,2.42242e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.32494,49383 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.15611,49380 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,20.2263,123328 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,204.243,1.10377e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.71524,50639 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.19861,50639 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,292.261,1.60557e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.17489,52530 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.91954,52530 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.6709,105099 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.89142,72967 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.52353,52569 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.81087,62950 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.02868,52377 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,4.7711,52300 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,345.339,1.88692e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,65.0372,368276 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.1288,151188 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,514.73,2.7732e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,329.902,1.73695e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,330.174,1.75558e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,47.4751,270664 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,30.4123,187537 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.2215,83300 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.3845,273875 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_164.txt deleted file mode 100644 index 29db7c0b54da6aa7d0431c32bb46542478491427..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,379.306,1.42757e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.64497,37278 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.88347,37297 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,900.541,3.74501e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.1469,43521 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.54092,43559 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,18.5551,100862 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,329.675,1.53371e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.86776,46220 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.98069,46501 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,479.022,2.36315e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.10373,49269 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.92033,49278 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,18.7268,119116 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,204.671,1.0882e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.35297,50563 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.81918,50620 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,293.578,1.61435e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.22158,52268 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.11317,52301 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.4222,88604 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.56134,68623 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,3.0971,52511 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,8.69792,57728 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.90547,52226 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.07817,52155 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,353.537,1.93273e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,68.0307,372313 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.517,156425 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,547.356,2.93903e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,352.325,1.86501e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,352.467,1.90256e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,51.5187,301084 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,30.5184,185959 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.2667,66566 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,43.6303,263236 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_165.txt deleted file mode 100644 index 3792531eb63ede8765add69a80cfe64f2ad54ff6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,353.773,1.32322e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.4627,36510 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.8332,36567 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,878.627,3.57603e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.05928,42794 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.75336,42851 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,17.1158,93353 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,316.068,1.43626e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.90983,45593 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.22248,45720 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,475.687,2.29564e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.32846,48366 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.68287,48382 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,19.1992,110644 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,199.562,1.04256e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.51739,49706 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,3.51719,53685 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,295.002,1.59147e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.27073,51350 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.21199,51350 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,12.6138,102966 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.33603,60989 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.8852,51521 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.8473,56777 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.30992,51375 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.90882,51280 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,344.155,1.85001e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,62.0464,346209 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,26.5284,148623 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,485.063,2.57151e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,317.001,1.59544e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,313.651,1.6023e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,45.7252,259752 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,27.9466,162611 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.16157,70558 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,40.4661,232799 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_166.txt deleted file mode 100644 index 4e7dee2df5d296223710824ae91ab705e879fc0b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,352.972,1.3171e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.25133,36683 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.45931,36702 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,885.443,3.58108e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.38741,42659 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.3844,42771 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,19.8063,110988 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,328.712,1.51501e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.41333,45436 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.27317,45524 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,477.967,2.32662e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.2292,48092 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.84405,48203 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,19.5186,111310 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,194.861,1.01432e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.30804,53532 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.18683,49610 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,296.149,1.59912e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.56193,51248 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.39044,51305 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.6294,97584 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.35232,61017 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.95976,51534 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.3614,66179 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.53209,51195 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.12908,51175 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,357.974,1.90505e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.0006,380612 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.5305,157813 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,544.245,2.8828e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,349.204,1.81432e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,346.818,1.81672e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,51.9549,306831 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.7948,203765 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,13.1344,91337 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.9918,268839 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_167.txt deleted file mode 100644 index 8f48476be1a78e1f69dc095ba7f190f736b9f4c9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,350.949,1.31074e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.10317,36453 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.50366,36453 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,878.849,3.60669e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.85388,42579 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.69125,42598 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,19.9735,106383 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,325.095,1.50842e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.37345,45411 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.45422,45408 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,472.051,2.28945e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.13227,48127 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.41281,48104 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,19.9196,120940 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,194.514,1.01048e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.62532,49456 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.35806,49396 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,296.671,1.5972e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.58171,51020 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.92363,51134 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.0122,102384 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,10.6179,76199 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.00952,51154 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.6804,66734 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.01533,50910 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.19199,50682 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,349.427,1.86051e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.2114,389789 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.1434,157984 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,550.625,2.90433e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,354.709,1.85793e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,352.117,1.8427e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,48.5219,270571 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.9688,172978 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.4235,71395 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,46.5118,258348 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_168.txt deleted file mode 100644 index c2573a11507e7b4f9218acc2d95ac5d9a87cf47b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,350.029,1.32438e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.79413,36356 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.08987,36432 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,876.486,3.58641e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.02325,42661 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.5181,42756 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,18.5093,101688 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,316.926,1.45832e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.64824,45388 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.77112,45462 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,470.364,2.27227e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.93378,48096 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.78824,48147 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,21.2716,134999 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,190.053,999012 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.09608,49460 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.13989,49533 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,295.297,1.59056e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.03483,51195 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.25576,51135 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.7841,97299 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,10.2158,76672 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.06478,51401 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.7958,72279 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.4984,56242 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.26824,51008 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,344.484,1.85136e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,67.526,370185 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.0443,147920 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,525.1,2.77217e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,338.872,1.77491e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,338.019,1.80635e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,47.0941,254613 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,27.9775,173259 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.21542,60441 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,41.7087,227963 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_42.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_42.txt deleted file mode 100644 index e8c305acc3d9e02c2cedb41cee945de9aecf6004..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_42.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,373.814,1.50342e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.2666,39127 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.80386,39069 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1320.04,5.89533e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.02645,47995 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.67969,48105 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,19.2226,119613 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,414.702,2.13905e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.43845,51060 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.92418,50984 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,679.246,3.67719e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.11397,54118 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.50782,54156 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,20.3157,136255 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,249.499,1.46504e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.95217,55715 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.73291,55734 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,403.468,2.41794e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.74872,57540 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.27275,57578 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.5194,115175 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.8221,86147 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.35854,57635 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,11.5996,103494 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.49869,57213 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.70418,57021 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,351.842,2.1004e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.4989,328936 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.7613,164526 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,794.891,4.65583e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,408.553,2.31782e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,409.279,2.35626e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,50.8358,322330 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,26.4958,168790 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.8298,101643 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,51.1146,335945 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_44.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_44.txt deleted file mode 100644 index 3dfc825f8558ace8c058e5a70880f9a4b487213d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_44.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,373.04,1.49763e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.97739,38877 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.75301,38877 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1318.17,5.84816e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.5918,47981 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.52329,48175 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,17.0323,106123 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,405.754,2.06704e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.85906,50977 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.29455,51003 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,678.918,3.62642e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.84783,54143 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.59723,54251 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,21.4198,151819 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,253.501,1.49005e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.88187,55678 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.6939,55620 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,412.319,2.46737e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.74961,57593 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.98414,57593 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.5037,109358 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.03222,68299 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.92216,57574 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,11.4418,97628 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.94646,57346 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.55647,57120 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.484,2.12355e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.3003,329059 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.5784,175790 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,796.238,4.63702e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,407.145,2.32402e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,409.181,2.36025e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,50.0483,326751 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,30.6403,207453 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.9928,106593 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,52.6524,329883 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_46.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_46.txt deleted file mode 100644 index 587a4bf63ff3e93ffa29ba2b34fd8ecda3453630..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_red_samp_fp16_46.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,372.942,1.50283e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.42906,39184 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.94965,39241 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1324.67,5.88102e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.11742,47981 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.79925,48021 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,18.3743,119504 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,410.906,2.10147e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.97375,51067 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.66037,51086 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,676.427,3.65834e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.06719,54117 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.34689,54269 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,20.9467,147949 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,251.52,1.48384e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.26901,55681 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.01198,55678 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,408.643,2.46041e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.6836,57634 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.81573,57558 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.2832,109364 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.36828,68324 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.89762,57691 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,11.4947,103479 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.2993,57538 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.14501,57462 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,353.979,2.13926e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,56.9928,346984 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.1811,175386 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,797.413,4.68361e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,407.74,2.34011e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,408.232,2.37399e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,49.4458,318489 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,28.7016,197598 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.5925,96118 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,48.8592,302847 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_261.txt deleted file mode 100644 index 7cbbe3bf580489cc033d2b9caec9453143ca51fc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,199.096,768777 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.87226,36317 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.31064,36355 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,862.768,3.5928e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.47999,42966 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.916697,42889 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,14.7617,85988 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,243.939,1.10794e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.35692,45410 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.885912,45430 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,432.939,2.0384e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.30457,48006 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.967639,48083 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,12.2797,91598 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,157.17,786742 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.45318,49513 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.8852,49551 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,297.318,1.59494e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.36708,51385 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.21256,51366 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.5806,97816 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.09633,57082 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,0.937241,51595 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.82347,51595 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,2.95617,51424 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.27187,51290 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,402.601,2.16753e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,100.073,540576 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.2652,213693 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,940.594,5.07676e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,529.699,2.81573e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,538.929,2.93758e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,68.0023,405177 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,40.2808,250158 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.5333,69055 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,53.7619,322533 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_262.txt deleted file mode 100644 index b8ce558ccafbc02f7f2875371c2c8861165c9188..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,201.491,782079 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.64602,36755 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.25925,36812 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,863.481,3.60566e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.323,43292 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.23471,43349 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.2389,86831 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,244.387,1.08745e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.66831,45676 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.88134,45676 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,434.161,2.03201e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.44482,48634 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.05884,48634 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,11.3014,82389 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,155.029,784704 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.46854,49989 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.37976,50063 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,294.808,1.5918e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.66305,51777 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.31285,51929 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.7435,93346 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.66416,62669 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.54901,52005 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.01051,52005 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,2.76116,46791 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.01021,51763 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,401.791,2.17637e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,101.101,554195 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.6083,214821 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,980.097,5.33162e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,567.423,3.03884e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,582.842,3.20481e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,70.175,419035 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,40.4284,263401 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,13.3167,91066 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,53.7532,315081 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_263.txt deleted file mode 100644 index dec9f13da9526d4483267d444aacf594f1c70259..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,239.067,970071 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.33674,38093 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.34824,38131 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1255.39,5.6059e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.37852,47241 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.771448,47298 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,14.7832,94710 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,341.94,1.68639e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.16271,50082 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.818232,50099 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,635.421,3.26296e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.23858,53688 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.90917,53652 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,17.0536,113283 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,233.522,1.32983e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.39134,55217 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.274,55138 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,411.418,2.44319e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.74261,57437 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.19678,57456 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.5902,109239 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.51897,73973 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.93151,57415 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,8.79818,63113 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.12541,57053 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.56009,56904 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,370.749,2.16788e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,73.0082,435852 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,29.2538,196008 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,719.422,4.17407e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,477.161,2.70832e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,482.106,2.80035e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,53.2209,328026 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3862,203294 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.51689,73278 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,42.6197,273975 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_264.txt deleted file mode 100644 index 2445a2271cd99145bd0e0a5e10364288a28f8987..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,239.767,971021 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.04048,38189 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.52667,38189 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1257.19,5.61616e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.40312,47337 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.07093,47356 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.0031,94899 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,342.292,1.69339e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.14918,50061 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.01116,50119 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,635.865,3.30913e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.45573,53609 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.24347,53686 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,18.5907,135377 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,233.494,1.36403e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.06065,55385 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.60712,55398 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,412.356,2.47544e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.91035,57419 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.64814,57473 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.7297,115070 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.47385,68360 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.72888,57565 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,8.59251,57568 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,2.73992,57114 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,4.57165,57041 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,352.018,2.10195e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,69.4555,427759 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,28.3901,175566 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,762.599,4.49019e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,458.711,2.59714e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,464.862,2.68914e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,51.7606,322724 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.6339,202986 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.0134,84635 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,41.902,274721 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_265.txt deleted file mode 100644 index 62d8f0cee8674e21d9597806cf296c9540c428e2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,237.945,974787 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.34541,38933 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.96817,38990 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1254.66,5.70865e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.36175,48021 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.993847,48021 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,14.85,96156 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,341.703,1.70559e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.12607,50768 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.871769,50863 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,631.892,3.30975e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.68056,54138 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.57061,54157 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,16.9613,119653 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,235.287,1.37796e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.30232,55876 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.31317,55818 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,409.034,2.46594e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.8821,57865 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.2812,57885 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,12.0105,115979 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.89378,75840 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.59963,57941 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,8.0217,57846 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.46285,57596 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,4.38688,57596 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,421.913,2.5351e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,113.737,672019 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,42.6905,276013 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1282.92,7.57748e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,783.575,4.51126e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,824.337,5.06865e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.7767,484119 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,46.8838,304945 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,13.0073,91564 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,59.7061,388204 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_266.txt deleted file mode 100644 index ecf5ecb4c08a80289cc115aad2e27074f9481b45..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,219.402,872895 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.06323,37820 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.28616,37877 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1087.48,4.72668e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.45423,45690 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.889464,45710 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,14.9285,91612 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,299.698,1.42697e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.15941,48366 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.69762,48404 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,543.633,2.71115e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.6237,51587 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.23599,51603 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,13.6092,87449 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,219.523,1.22238e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.38871,53306 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.32568,53268 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,385.392,2.22364e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.79116,55394 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.15909,55391 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.5473,105219 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.77498,65625 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.17118,55485 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,11.9563,82410 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.65402,55302 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.5582,55148 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,400.445,2.30964e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,99.0877,577237 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.3662,217807 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1139.33,6.49791e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,588.399,3.32596e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,603.878,3.45864e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,68.9594,418837 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,41.7461,261641 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.1301,94556 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.1888,354516 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_267.txt deleted file mode 100644 index 5ad85fb3de959b2b816dd24d066a50b85e831895..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,220.768,858125 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.07994,37021 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.91592,37059 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1088.57,4.65225e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.56389,44988 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.27442,45124 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.7948,94823 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,300.073,1.40906e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.19535,47582 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.977017,47717 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,547.289,2.68416e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.61176,50906 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.42805,50864 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,15.0744,101958 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,223.919,1.23953e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.20094,52483 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.717,52617 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,385.025,2.19024e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.48081,54763 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.45169,54795 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.8127,109809 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,10.1876,70549 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.12561,54924 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,9.80338,60375 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.58622,54682 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,4.99952,54435 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,342.591,1.95981e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,65.2348,378357 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,27.6085,157669 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,723.164,4.09217e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,386.548,2.12536e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,385.66,2.13438e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,47.3982,289352 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.2509,190152 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1746,81443 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,41.9383,254484 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_268.txt deleted file mode 100644 index c0755261b91bf23e68451ead7a878f72a21f1839..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,223.219,866209 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.29751,36564 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.38753,36602 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1088.38,4.59111e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.36405,44671 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.0197,44691 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.7704,89378 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,300.752,1.40425e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.19036,47394 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.865849,47429 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,546.413,2.67128e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.81794,50614 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.06284,50729 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,12.0299,96463 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,217.962,1.19927e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.12097,57432 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.48472,52097 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,386.475,2.17513e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.23227,54507 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.37096,54504 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,11.6492,109070 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,10.6493,87036 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.33381,48998 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.42382,59076 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.39623,49099 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,4.88089,49023 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,330.773,1.86958e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,51.2756,298484 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,22.9797,157104 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,570.755,3.21619e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,311.244,1.71093e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,311.758,1.73012e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,42.3526,256955 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,23.9081,160559 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,8.93398,64312 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,33.2813,213307 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_269.txt deleted file mode 100644 index 7786d5550a860795cc7a9af0571536c8f002fb0b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,225.308,878318 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.25354,36964 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.42312,37002 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1090.22,4.64791e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.4006,44952 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.983479,45012 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,15.8368,90062 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,301.216,1.40918e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.33717,47566 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.913721,47585 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,545.781,2.68126e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.42581,50829 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.20725,50886 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,13.1032,107069 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,231.958,1.27411e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.79127,52506 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.59179,52352 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,386.436,2.09794e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.93601,54701 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.14792,54774 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,12.0976,109698 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,9.89964,81790 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.42037,54830 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,10.0702,65802 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,3.861,54641 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,5.58297,54474 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,352.343,2.00256e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,70.9341,398430 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,30.738,178279 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,801.38,4.51729e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,443.007,2.44053e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,427.524,2.36311e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,53.5074,320543 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.6672,195257 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.3089,80832 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,45.586,269939 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp32_perf_fp32_120.txt deleted file mode 100644 index f00d5f1ed2fbbe2687b34d1056b0e8f710c876c6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,539.627,1.99043e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.16915,36437 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.89879,36514 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,2027.43,8.19658e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.94056,46777 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.5858,46796 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,36.2756,195821 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,950.176,4.46438e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.26696,49224 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.48219,49240 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1414.68,7.23013e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.54139,52507 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.93207,52526 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.4147,178967 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,337.686,1.93166e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.38756,54394 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.20894,54394 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,562.317,3.29486e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.00581,57127 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.06421,57085 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.791,137046 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,4.97337,57218 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.98056,57313 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.19512,57294 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.20489,57142 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.99914,56989 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,365.507,2.19674e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.5339,340298 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,40.5251,267754 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1070.58,6.06614e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,811.869,4.21348e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,663.055,3.50782e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.904,435045 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.1423,199582 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.7964,86630 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,63.4149,367588 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_121.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_121.txt deleted file mode 100644 index 507ccfb18484d8336db7f85b3ff3f53e610508dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_121.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,463.303,1.76918e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.80061,37657 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.71535,37676 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1139.3,4.77571e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.14229,44608 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.77874,44586 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.2747,177316 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,356.378,1.68966e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.33614,47132 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.78242,47148 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,590.817,2.9326e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.39963,49955 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.62117,49993 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,26.2844,149031 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,326.912,1.76844e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.53396,52247 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.34178,52190 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,571.489,3.25395e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,4.07786,55086 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.44709,55007 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9743,132412 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.31362,59774 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.91106,55127 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.93124,55053 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.3293,55058 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.67543,54889 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,351.478,2.02206e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.9578,311755 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.6067,226535 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,837.679,4.63e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,551.053,2.95982e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,572.592,3.14542e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,74.4555,417885 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,28.045,166902 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,7.9946,56776 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,53.8845,306039 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_122.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_122.txt deleted file mode 100644 index a88dd7e64b895743978a7dc30a73a0dbc7aea168..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_122.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,462.507,1.77319e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.18257,37772 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.31061,37810 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1138.87,4.79222e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.09867,44486 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.02958,44501 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,34.534,186204 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,367.376,1.75098e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.00658,47240 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.24613,47236 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,584.331,2.91639e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.4093,49994 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.35611,50087 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.1279,165066 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,341.834,1.85874e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.37009,56433 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.06434,52144 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,561.96,3.1899e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.12967,54906 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.80225,54921 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,20.1178,137144 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,4.8846,54990 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.87263,55009 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.37842,55013 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.92486,54860 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.98142,54764 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.552,2.05582e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.4482,316215 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,40.3342,252489 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,857.765,4.76229e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,559.232,2.9486e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,576.081,3.16067e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,75.9264,439079 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.1708,171763 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,7.90801,62038 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,56.8385,322044 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_123.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_123.txt deleted file mode 100644 index f625708f35b3d10af1ac943760493f463cac08c1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_123.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,437.953,1.66532e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.41153,37296 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.36523,37315 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1121.87,4.74012e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.36126,44302 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.44811,44322 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.7631,190062 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,365.679,1.77306e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,3.03326,46930 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.35995,46949 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,584.447,2.96575e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.73886,49742 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.20453,49776 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,23.884,149630 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,340.073,1.85428e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.46027,51768 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.40834,51768 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,549.789,3.10959e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.78392,54594 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.88946,54690 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.7755,126361 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.65385,59408 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.75634,54797 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.20123,54722 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.66834,54495 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.80535,54419 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,402.055,2.27725e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,73.3859,423786 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,53.3935,313453 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1248.44,6.83024e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,819.675,4.31134e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,842.22,4.67499e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,99.7207,569478 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,41.8504,250959 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.4542,68004 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,75.1463,426502 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_124.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_124.txt deleted file mode 100644 index f4a26c1fd504dbd5da22ca2e15282635494674e4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_124.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,433.029,1.66026e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.79847,37526 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.94894,37545 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1115.5,4.67817e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.74805,44293 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.51986,44234 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.6099,177747 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,350.715,1.66186e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.03669,46972 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.69615,47045 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,575.575,2.86038e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.9667,44899 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.67573,44937 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,23.9632,143993 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,341.352,1.82392e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.54861,51957 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.10808,51977 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,553.307,3.13107e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.85675,54603 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.11919,54638 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.0234,120596 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.12863,59638 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.96779,55014 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.03931,54957 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.27804,54671 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.67854,54499 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,414.334,2.34711e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,81.1861,472485 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,57.9362,345585 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1376.77,7.54063e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,905.412,4.78984e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,925.723,5.1848e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,109.2,632711 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,44.4932,264202 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.8511,84914 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,81.544,476016 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_125.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_125.txt deleted file mode 100644 index 7804c699e58b893ba877de2f4fa22972df010d24..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_125.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,516.605,2.02074e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.08455,38686 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.16321,38724 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1442.45,6.28461e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.1052,47107 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.93118,47124 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.9007,173007 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,504.737,2.53152e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.44888,50485 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.63887,50523 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,818.749,4.35636e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.83912,53918 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.95569,59116 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.3683,183554 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,350.651,2.05332e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.03086,55901 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.18891,55960 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,581.055,3.53253e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.62206,58706 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.09442,58630 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9037,141471 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.75583,64099 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.82994,58688 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.5515,58688 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.42665,58688 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.3056,58611 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,361.732,2.19452e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,50.9245,331948 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.0977,241075 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1033.24,6.05109e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,686.711,3.9161e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,696.342,4.00968e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,67.7964,400530 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.1881,210799 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.8773,69820 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,56.9077,350084 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_126.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_126.txt deleted file mode 100644 index ea6400d03aa3ad71cac15db58e32481da2483845..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_126.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,512.889,2.01624e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.56292,38992 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.40367,39049 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1431.3,6.29537e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.60188,47503 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.86101,47420 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.7532,175943 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,516.412,2.62336e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.93544,50769 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.83394,50788 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,856.837,4.57126e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.06059,54186 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.71297,54205 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.5667,177722 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,354.288,2.10221e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.15538,56361 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.41349,56361 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,589.817,3.61846e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.8827,58957 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,3.13194,58976 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.2826,135265 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.32367,59049 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.12677,59106 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,5.95445,59163 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.22271,58840 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.30033,58783 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,351.258,2.19612e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,51.0832,328802 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,37.5998,249477 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1048.34,6.18942e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,697.459,4.03406e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,709.916,4.14341e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,69.5421,420211 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.7712,195854 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.08736,65243 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.317,345781 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_127.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_127.txt deleted file mode 100644 index 11dacf885883227c5be90ef5b5ad89f123c6482c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_127.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,513.644,2.01811e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.59313,38800 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.09014,38819 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1459.44,6.36232e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.52418,47452 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.50447,47425 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,30.1814,170372 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,506.56,2.53914e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.60098,50870 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.42729,50870 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,864.817,4.56692e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.51477,54332 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.582,54385 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.9645,184505 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,371.805,2.18465e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.42529,56397 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.48629,56397 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,621.968,3.79972e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.37768,59376 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.3429,59258 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.0982,142744 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.49858,64460 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.76738,59487 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.03665,59411 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.6974,59011 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.14829,58801 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,365.736,2.25533e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.5463,346146 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.5809,265556 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1101.98,6.49844e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,732.589,4.23126e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,745.956,4.34033e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,72.0537,440311 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.4142,216953 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.3084,69893 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.1276,339492 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_128.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_128.txt deleted file mode 100644 index cf76f32f4ceb561231d6ed75a16ba374bb568cdc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_128.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,490.555,1.91277e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.76923,38268 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.96098,38249 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1422.48,6.18362e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.65071,46548 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.8917,46539 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.8054,181739 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,502.785,2.53056e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.79368,49969 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.61884,50007 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,822.43,4.37771e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.58405,53441 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.36056,53498 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.7038,176948 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,361.565,2.11351e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.89698,55445 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.83042,55502 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,585.627,3.53768e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.72002,58036 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.98949,58112 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.8197,127234 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.31596,58169 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.9045,58284 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.18161,58149 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.77481,57881 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.7898,57805 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.041,2.17864e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.103,339081 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.7997,261123 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1104.5,6.39538e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,718.275,3.94992e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,740.786,4.25646e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,74.8935,457030 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.3358,210152 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.5654,69967 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,56.8306,347219 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_129.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_129.txt deleted file mode 100644 index a6f28a7c602e85be94e5f27feb7a652c82c483c8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_129.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,488.551,1.89905e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.07681,38286 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.20149,38321 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1422.93,6.17546e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.76018,46702 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.88731,46640 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.1803,176017 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,519.02,2.61002e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.01589,50031 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.83906,50050 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,853.518,4.55277e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.62392,53482 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.36114,53516 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.9432,177270 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,359.421,2.09654e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.21755,55503 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.3411,55390 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,584.796,3.46279e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.82331,58295 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.24235,58232 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.4951,140724 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.48997,63602 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.69624,58402 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.33364,58325 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.03267,58021 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.19041,58021 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,357.807,2.07422e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.1799,335929 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.9407,245484 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1110.51,6.37089e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,724.779,4.04036e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,747.887,4.3069e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.0584,435819 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,33.3917,209657 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.0744,64045 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.0774,348729 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_130.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_130.txt deleted file mode 100644 index 636e1870792259d4903ac8708a3c06e07c5ba4c0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_130.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,489.265,1.90801e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.08052,38420 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.5533,38458 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1444.06,6.23284e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.61314,46778 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.26415,46807 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.4642,174262 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,507.373,2.5221e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.76619,49995 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.34719,50033 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,852.637,4.48079e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.97144,53488 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.44322,53431 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.6332,192769 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,370.72,2.17036e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.90097,55529 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.18888,55396 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,622.862,3.76753e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.12874,58209 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.49246,58225 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.269,128353 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.09938,63426 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.99973,58434 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.08824,58358 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.12934,58185 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.18916,58185 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,363.332,2.21334e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.9078,336453 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,40.3138,262016 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1166.64,6.79804e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,759.293,4.23371e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,778.059,4.47344e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.8648,457931 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,33.4184,215316 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.80722,64270 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.8009,337244 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_131.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_131.txt deleted file mode 100644 index 50c3eb9ec42d079ea23479abe45ca801abe3360d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_131.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,522.794,2.04511e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.73265,38878 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.18905,38839 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1523.97,6.65219e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.6038,47678 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.24581,47691 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.8813,166415 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,533.498,2.69408e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.74501,51110 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.20937,51126 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,942.318,4.9949e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,3.15937,54300 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.61883,54300 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,31.5194,201945 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,371.802,2.20157e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.64565,56288 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.31787,56326 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,612.351,3.74904e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.79723,59205 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.43208,59202 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.1139,124537 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.90027,69758 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.78789,59316 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.70946,59240 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.87174,52934 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.19189,52934 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,366.553,2.23326e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,55.254,352097 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.9132,271326 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1214.21,7.11476e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,801.81,4.56785e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,812.935,4.72435e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,71.2519,429776 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3433,201451 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.90617,92328 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.6299,357340 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_132.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_132.txt deleted file mode 100644 index d2c262d04f052a8a164878434bea1ef52da3b321..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_132.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,525.37,2.06192e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.96747,38705 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.06894,38629 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1520.58,6.66722e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.01519,47444 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.61154,47444 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.7819,170612 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,536.154,2.72908e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.54613,51080 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.84213,51191 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,932.92,4.99892e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.20914,54418 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.4404,54476 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.1968,197082 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,367.303,2.1788e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.35217,56422 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.98562,56365 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,610.051,3.74586e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.97608,59053 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.88159,59053 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.3418,130176 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.61065,64309 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.90408,59259 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.24766,59183 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.49644,58975 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.90756,58818 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,378.999,2.3384e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.0377,338169 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.9645,264915 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1200.99,7.04572e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,787.969,4.44044e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,804.791,4.6697e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,71.1626,432984 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.0567,210482 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.88435,75320 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.4652,354827 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_133.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_133.txt deleted file mode 100644 index e8443470d82519ee6382e895f49dddd6ad947205..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_133.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,520.888,2.04579e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.47944,38877 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.12322,38972 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1514.43,6.63255e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.56204,47815 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.07541,47828 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,28.3388,163490 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,527.728,2.63761e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.54604,51022 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.10674,51096 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,957.354,5.10974e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.78369,54366 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.50894,54377 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.6448,202412 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,367.438,2.18595e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.01608,56323 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.23202,56400 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,616.571,3.78567e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.00635,59071 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.06529,59163 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.1188,129509 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.1254,59201 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.01387,59296 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.6084,59201 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.34773,59049 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.6952,64896 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,355.626,2.21807e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.0829,340629 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.1219,259621 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1158.3,6.83392e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,761.462,4.27722e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,778.415,4.54375e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,70.3622,430421 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3293,207125 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1576,86781 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,53.7575,330000 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_134.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_134.txt deleted file mode 100644 index f156a272bc0eca63a6594e27aa836509f288799c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_134.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,518.225,2.03857e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.76772,38705 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.54213,38839 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1511.44,6.60475e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.64194,47527 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.6677,47581 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.5123,175937 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,531.752,2.68169e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.50665,51028 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.54812,51083 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,955.147,5.09862e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,3.28308,54426 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.75832,54345 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.6361,202254 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,367.801,2.18166e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.3061,56250 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.37115,56270 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,613.327,3.75759e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.20113,59033 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.25854,58995 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.5104,129265 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.39858,59068 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.01976,59087 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.26823,59087 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.39263,59087 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.12753,58550 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,353.751,2.19414e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.7501,344014 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.7621,259321 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1233.71,7.25328e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,810.47,4.59346e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,829.939,4.82735e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.6112,439468 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.6467,205827 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.57212,81121 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.215,356117 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_135.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_135.txt deleted file mode 100644 index d93aac33b8164425eb51d4dc814e2e054f163ae5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_135.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,497.796,1.93042e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.85409,38268 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.4189,38306 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1510.04,6.51319e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.77071,47233 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.40694,47306 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,30.0603,156603 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,532.12,2.66163e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.06754,50490 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.41295,50490 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,944.021,4.99783e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.60625,53848 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.67332,53906 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.0045,183834 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,370.626,2.18061e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.13735,55695 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.94213,55733 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,611.587,3.70482e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.46552,58631 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.97964,58419 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.2842,122698 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.12955,63752 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.06546,58742 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.23947,63617 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.50137,58281 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.79949,58186 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,346.122,2.12271e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,48.9698,309221 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,35.7981,229550 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1113.87,6.51033e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,721.95,4.05898e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,743.515,4.2954e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,68.1563,409251 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.4963,188728 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1106,69993 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,54.0631,321801 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_136.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_136.txt deleted file mode 100644 index 0b546e90369714ce255fdcfdf59eaaa4ad1cfbf9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_136.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,500.05,1.94081e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.03905,38192 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.37129,38211 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1499.93,6.50211e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.79157,47249 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.8422,47278 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.4395,171501 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,527.901,2.63924e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.83384,50487 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.70271,50523 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,932.858,4.94098e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.81124,53920 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.28826,53958 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.963,178504 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,374.848,2.20488e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.36002,55907 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.26815,55963 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,619.314,3.75355e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.53089,58496 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.90792,58512 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.6418,134224 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.1248,58722 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.16875,58741 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.60833,63770 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,6.17848,58588 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.31312,58186 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.901,2.19902e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.4904,348181 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.9841,258346 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1228.34,7.1658e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,798.616,4.50051e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,818.663,4.72786e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,72.3452,442673 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.4503,215227 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.604,91273 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.0678,353514 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_137.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_137.txt deleted file mode 100644 index 18601a156bdc8c142870fd2d9555bd3ee59032bc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_137.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,494.673,1.94348e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.66638,38664 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.33449,38588 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1494.56,6.44786e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.65723,47325 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.41615,47359 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,29.287,165234 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,522.499,2.58653e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.65298,50408 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.17503,50466 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,956.173,4.99662e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,3.09796,53805 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.70279,53787 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.8641,194485 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,368.763,2.16565e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.92277,55739 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.94981,55758 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,618.426,3.73154e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.29106,58531 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.45537,58531 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.5694,117081 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.14133,63675 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.84811,58722 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.87297,58645 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.98089,58472 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.19059,58242 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,356.023,2.18003e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.9909,349138 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,41.5442,275170 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1235.38,7.14779e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,813.297,4.61604e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,816.186,4.7365e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,71.9996,429052 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.682,212127 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.3575,97760 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.9096,356092 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_138.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_138.txt deleted file mode 100644 index 18e2ee76b7ca35b0a962cc80809c02903390cddb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_perf_138.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,493.702,1.93113e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.48485,38344 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.38601,38382 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1496.88,6.37549e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.5277,47233 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.27279,42369 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,30.069,166361 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,518.673,2.5721e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.63791,50474 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.15097,50529 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,951.896,4.78278e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.89698,53722 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.26968,53747 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,31.9208,199983 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,369.81,2.17044e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.24215,55713 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.09726,55636 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,622.511,3.7004e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.03233,58453 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.14261,58376 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.5437,133799 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.21081,58586 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.81855,58663 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.61397,58509 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.59887,58261 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.24077,58185 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,347.875,2.13694e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.3979,342073 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.5801,251027 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1193.2,6.96721e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,781.872,4.35658e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,800.611,4.63655e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.1664,436793 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.8312,199875 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1147,75365 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,54.928,332389 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_41.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_41.txt deleted file mode 100644 index 070e97bed17bb07898d33e45d7ea90d12f52461f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_41.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,504.874,1.93218e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.2764,37870 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.84478,37889 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1821.15,7.86918e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.54677,48007 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.33131,48083 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.4474,181601 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,818.136,4.06536e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.06821,50905 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.92738,50924 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1332.75,7.09964e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.33106,54383 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.84814,54380 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.1628,179710 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,326.011,1.91353e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.13013,56167 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.2812,56186 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,542.389,3.25438e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.66892,58802 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.12238,58840 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9176,141070 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.40329,64347 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.65055,58916 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.26913,58916 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.00998,58802 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.06369,58631 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.221,2.20974e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,39.7362,254622 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.5187,230487 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1030.71,6.01365e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,594.95,3.36356e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,557.701,3.13541e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,72.117,434533 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,21.496,157302 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.77318,59332 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,60.2474,367981 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_43.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_43.txt deleted file mode 100644 index adee4a649a5230b37c713106248be75c1d031a52..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_43.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,512.584,1.97652e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.32862,38022 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,3.20465,37983 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1806.09,7.89462e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.81548,48135 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.55054,48212 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.4837,203280 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,808.325,4.12335e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.66647,51342 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.69652,51285 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1131.13,6.20321e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.82613,55541 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.96615,55541 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.3146,183952 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,322.36,1.94429e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.08904,57274 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.06011,57350 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,541.034,3.34225e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.92543,59851 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.17311,59867 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9872,149334 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,4.56032,59981 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.84616,59904 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.16059,59904 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.07836,59561 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.96989,59504 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,366.098,2.28511e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,39.3923,252201 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.8196,240015 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1017.86,6.04683e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,617.427,3.51735e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,583.057,3.35479e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.9926,441140 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,22.9876,163395 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.987,64947 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,62.2584,391012 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_45.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_45.txt deleted file mode 100644 index 151d408eed7b2ed7f739d5916eac202a3cf2fec2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_red_samp_45.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,502.579,1.9411e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,1.98366,38272 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.89252,38310 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1802.71,7.87184e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.703,48252 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.11467,48291 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,34.2146,192488 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,780.652,3.96253e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.01461,51323 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.35058,51343 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1158.69,6.33308e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.46347,55465 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.77848,55541 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.3453,194165 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,327.96,1.96248e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.31067,57084 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.46417,57084 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,542.253,3.3329e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.80732,59851 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.14159,59793 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.8409,143814 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.34982,65415 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.65468,59888 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.04942,59888 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.50226,59622 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.53594,65470 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.603,2.2467e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,45.8217,286536 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,37.9513,239505 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1017.63,6.04958e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,601.659,3.44212e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,576.172,3.33753e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,74.0101,448507 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,26.2698,164121 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.7095,60231 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,61.3979,381951 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_231.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_231.txt deleted file mode 100644 index b1900b41c2d7c78e6b425d7b3e2f52f8c3f08606..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_231.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,312.955,1.05814e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.29102,32634 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.92555,32750 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1270.25,4.95079e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.64185,42084 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.93026,42046 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.9684,164910 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,375.799,1.72684e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.62242,45265 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.86757,45437 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,628.419,3.06087e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.55881,49018 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.97983,49095 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,24.1804,147478 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,236.215,1.27624e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.07611,50990 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.91714,51009 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,416.23,2.3139e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.9468,53167 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.39957,53167 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.4841,128208 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.49903,58078 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.9957,53450 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.69527,53298 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.90681,52994 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.96046,52917 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,398.579,2.22238e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,75.6271,418107 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,53.4082,301014 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,2181.08,1.18919e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1541.07,7.72153e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1518.65,7.74613e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,138.242,667792 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,43.985,200120 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,16.4329,76086 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,126.613,598657 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_232.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_232.txt deleted file mode 100644 index acea102c80b68b7a0c693135d1a2bd9dc6dbb418..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_232.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,320.765,1.0895e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.74497,32444 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.91287,32520 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1271.88,4.91642e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.52866,42009 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.32443,42066 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.5942,164724 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,374.875,1.70587e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.75916,45227 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.94523,45284 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,631.129,3.06489e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.68424,49132 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.28821,48995 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.4421,157819 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,231.786,1.24844e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.17838,50775 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.17326,50736 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,418.555,2.32751e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.8323,52971 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.43531,53009 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.7868,122228 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.95036,53082 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.27055,53253 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.92836,57843 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.84128,52968 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.47956,52968 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,408.421,2.2672e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,76.7764,449207 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,54.9257,305927 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,2268.42,1.23557e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1680.26,8.35215e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1579.97,8.0778e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,134.762,654124 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,42.9408,220383 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.0336,66829 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,119.188,566439 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_233.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_233.txt deleted file mode 100644 index 47aa85aa79ce2df08664ec1ae85b31013416baf5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_233.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,364.281,1.42832e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.01979,37783 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.91876,37859 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1724.95,7.72746e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.61807,48656 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.72616,48711 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.8694,198585 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,485.892,2.51283e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.7669,51802 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.50969,51876 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,882.543,4.87772e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.41816,55778 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.91368,55832 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.075,201041 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,301.008,1.83866e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.06062,57411 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.58459,57355 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,558.599,3.47618e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.79893,59907 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.99259,59945 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.0353,143544 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.68991,65276 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.00786,60020 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.53729,60020 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.20431,59634 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.20196,59403 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,364.101,2.26809e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,56.8705,359483 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,41.6894,285564 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1542.04,9.30453e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,983.446,5.5907e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1027.43,6.08045e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.4432,458940 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.5815,196982 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.17091,76617 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,61.1893,380972 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_234.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_234.txt deleted file mode 100644 index 19f49e9d486a58accb45ef9903d341e8b9585c09..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_234.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,359.171,1.3654e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.56808,36755 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.57861,36830 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1720.43,7.51741e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.55141,48008 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.80565,47875 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,29.423,167745 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,477.2,2.42612e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.41721,51394 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.20194,51432 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,887.624,4.77616e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.74392,55249 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.76305,55283 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.7709,205785 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,301.656,1.81816e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.88015,57099 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.21406,57041 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,562.265,3.49027e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.89061,59568 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.32229,59603 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.4432,119295 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.97577,65011 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.85704,59733 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.36932,64953 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.80956,59428 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.87399,59198 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,354.283,2.21567e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.9692,353895 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.3985,273572 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1682.64,1.00985e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,966.772,5.42835e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,994.348,5.79107e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,82.3764,478623 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.1803,209638 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.9806,96473 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,66.7879,398824 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_235.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_235.txt deleted file mode 100644 index f5857d3264f277810e2ca7bd7bde7cfd7f1d46cd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_235.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,359.179,1.2212e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.8443,33172 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.75128,33192 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1725.86,7.02108e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.15976,45546 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.74376,45603 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.1932,192282 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,486.73,2.42326e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.32251,49345 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.11051,49344 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,874.152,4.65541e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.14318,53834 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.4425,53834 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.2372,171817 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,304.18,1.78209e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.93003,55659 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.31419,55659 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,556.548,3.35208e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.22421,58182 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.0956,58217 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.3645,116615 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.83999,63656 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.86607,58438 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,5.43334,58286 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.87446,57981 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.7042,57854 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,420.697,2.53764e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,85.3445,534738 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,60.8316,387686 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,3076.32,1.7793e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,2322.36,1.18957e+07 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1926.57,9.90526e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,147.311,701287 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,47.9584,230534 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.2147,71974 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,134.215,637238 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_236.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_236.txt deleted file mode 100644 index b38950da77e2c0ae11b36cc570e8a3c64b7c100b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_236.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,369.184,1.30472e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,1.83851,34353 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.51925,34373 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1814.45,7.54365e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.95391,46542 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.14664,46540 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.855,187012 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,507.61,2.53206e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.52722,49910 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.48972,50117 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,946.582,5.02218e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.71148,54187 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.50529,54206 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,25.3727,162878 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,339.38,1.9942e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.9773,56181 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.00066,56181 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,600.8,3.65449e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.04114,58747 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.99851,58760 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.3574,159457 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.00924,64151 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.56626,58838 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.17109,58838 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.10332,58765 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.09669,58536 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,416.965,2.55865e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,76.3945,481875 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,53.5784,341610 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,2759.9,1.6179e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1799.2,9.41621e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1565.91,8.37989e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,126.676,649481 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,41.5973,232891 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.366,69394 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,110.356,555019 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_237.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_237.txt deleted file mode 100644 index 3c5e6c169a08069d304782a66cf2b965c55e2931..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_237.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,366.675,1.45154e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,1.87509,37821 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.80517,37879 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1807.03,8.02866e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.63839,48695 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.56049,48752 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.7969,185595 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,512.094,2.68803e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.58895,52031 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.87138,52047 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,941.746,5.20451e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.13765,55821 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.10807,55782 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.328,190552 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,330.901,2.01841e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.17483,57648 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.25227,57686 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,595.453,3.71972e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.89838,60127 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.00248,60085 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.9617,138566 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.71676,65494 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.72594,60254 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.70641,65417 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.9998,59796 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.73671,59723 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.565,2.24855e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,51.6014,332793 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,37.4462,251934 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1604.66,9.70904e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,862.655,4.92921e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,894.783,5.32491e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,69.249,427305 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.0203,213623 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.1213,98381 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,55.7902,348237 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_238.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_238.txt deleted file mode 100644 index dbc124748794072b323ece1525a773ec28ea633e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_238.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,361.396,1.42936e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.077,38038 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.85547,38057 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1801.71,8.07173e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.50293,48844 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.82636,48860 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.1758,175635 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,505.379,2.6346e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.64517,52142 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.96376,52219 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,943.341,5.17271e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.88607,56170 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.52344,56131 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.3298,184853 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,332.621,2.02922e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.81202,57654 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.94287,57596 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,597.906,3.72187e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.1228,60207 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.09502,60223 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.2042,151258 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.2175,65703 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.67349,60463 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.27326,60313 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.12176,60010 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.77034,59780 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,332.125,2.08543e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,40.8915,282376 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,31.3805,224408 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1229.22,7.34539e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,658.089,3.74298e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,677.838,3.98656e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,56.4301,354013 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,25.7725,165318 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.87913,71131 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,45.0084,284986 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_239.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_239.txt deleted file mode 100644 index cc13932a92b884c0e4a74dda6d2bd25edbadd7bb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp16/alexnet_canny_samp_239.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,365.933,1.40138e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.80164,36850 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.55448,36888 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1806.48,7.93584e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.88536,48163 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.64411,48201 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.6716,207285 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,512.369,2.65332e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.04005,51632 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.16469,51647 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,935.445,5.14346e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.03416,55351 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.77188,55389 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.7423,188823 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,338.056,2.03138e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.87183,57064 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.23042,57199 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,592.54,3.65101e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.68578,59431 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.24219,59489 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.0715,124954 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.97676,64936 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.76684,59600 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.85672,59600 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.97948,59450 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.37134,59450 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,354.286,2.1935e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.0783,348349 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.247,244872 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1792.85,1.07184e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1012.73,5.58673e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1045.6,5.94124e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,83.6212,481961 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.2344,206842 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.1349,73456 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,69.2339,398246 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_120.txt deleted file mode 100644 index f00d5f1ed2fbbe2687b34d1056b0e8f710c876c6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_120.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,539.627,1.99043e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,4.16915,36437 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.89879,36514 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,2027.43,8.19658e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.94056,46777 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.5858,46796 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,36.2756,195821 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,950.176,4.46438e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.26696,49224 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.48219,49240 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1414.68,7.23013e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.54139,52507 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.93207,52526 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.4147,178967 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,337.686,1.93166e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.38756,54394 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.20894,54394 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,562.317,3.29486e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.00581,57127 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.06421,57085 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.791,137046 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,4.97337,57218 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.98056,57313 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.19512,57294 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.20489,57142 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.99914,56989 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,365.507,2.19674e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.5339,340298 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,40.5251,267754 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1070.58,6.06614e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,811.869,4.21348e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,663.055,3.50782e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.904,435045 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.1423,199582 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.7964,86630 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,63.4149,367588 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_121.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_121.txt deleted file mode 100644 index 507ccfb18484d8336db7f85b3ff3f53e610508dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_121.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,463.303,1.76918e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.80061,37657 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.71535,37676 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1139.3,4.77571e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.14229,44608 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.77874,44586 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.2747,177316 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,356.378,1.68966e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.33614,47132 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.78242,47148 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,590.817,2.9326e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.39963,49955 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.62117,49993 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,26.2844,149031 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,326.912,1.76844e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.53396,52247 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.34178,52190 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,571.489,3.25395e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,4.07786,55086 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.44709,55007 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9743,132412 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.31362,59774 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.91106,55127 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.93124,55053 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.3293,55058 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.67543,54889 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,351.478,2.02206e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.9578,311755 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.6067,226535 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,837.679,4.63e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,551.053,2.95982e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,572.592,3.14542e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,74.4555,417885 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,28.045,166902 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,7.9946,56776 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,53.8845,306039 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_122.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_122.txt deleted file mode 100644 index a88dd7e64b895743978a7dc30a73a0dbc7aea168..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_122.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,462.507,1.77319e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.18257,37772 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.31061,37810 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1138.87,4.79222e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.09867,44486 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.02958,44501 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,34.534,186204 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,367.376,1.75098e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.00658,47240 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.24613,47236 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,584.331,2.91639e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.4093,49994 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.35611,50087 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.1279,165066 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,341.834,1.85874e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.37009,56433 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.06434,52144 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,561.96,3.1899e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.12967,54906 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.80225,54921 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,20.1178,137144 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,4.8846,54990 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.87263,55009 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.37842,55013 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.92486,54860 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.98142,54764 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.552,2.05582e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.4482,316215 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,40.3342,252489 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,857.765,4.76229e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,559.232,2.9486e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,576.081,3.16067e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,75.9264,439079 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.1708,171763 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,7.90801,62038 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,56.8385,322044 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_123.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_123.txt deleted file mode 100644 index f625708f35b3d10af1ac943760493f463cac08c1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_123.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,437.953,1.66532e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.41153,37296 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.36523,37315 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1121.87,4.74012e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.36126,44302 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.44811,44322 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.7631,190062 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,365.679,1.77306e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,3.03326,46930 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.35995,46949 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,584.447,2.96575e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.73886,49742 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.20453,49776 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,23.884,149630 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,340.073,1.85428e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.46027,51768 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.40834,51768 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,549.789,3.10959e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.78392,54594 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.88946,54690 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.7755,126361 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.65385,59408 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.75634,54797 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.20123,54722 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.66834,54495 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.80535,54419 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,402.055,2.27725e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,73.3859,423786 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,53.3935,313453 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1248.44,6.83024e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,819.675,4.31134e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,842.22,4.67499e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,99.7207,569478 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,41.8504,250959 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.4542,68004 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,75.1463,426502 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_124.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_124.txt deleted file mode 100644 index f4a26c1fd504dbd5da22ca2e15282635494674e4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_124.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,433.029,1.66026e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.79847,37526 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.94894,37545 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1115.5,4.67817e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.74805,44293 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.51986,44234 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.6099,177747 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,350.715,1.66186e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.03669,46972 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.69615,47045 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,575.575,2.86038e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.9667,44899 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.67573,44937 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,23.9632,143993 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,341.352,1.82392e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.54861,51957 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.10808,51977 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,553.307,3.13107e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.85675,54603 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.11919,54638 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.0234,120596 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.12863,59638 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.96779,55014 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.03931,54957 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.27804,54671 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.67854,54499 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,414.334,2.34711e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,81.1861,472485 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,57.9362,345585 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1376.77,7.54063e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,905.412,4.78984e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,925.723,5.1848e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,109.2,632711 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,44.4932,264202 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.8511,84914 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,81.544,476016 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_125.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_125.txt deleted file mode 100644 index 7804c699e58b893ba877de2f4fa22972df010d24..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_125.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,516.605,2.02074e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.08455,38686 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.16321,38724 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1442.45,6.28461e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.1052,47107 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.93118,47124 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.9007,173007 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,504.737,2.53152e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.44888,50485 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.63887,50523 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,818.749,4.35636e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.83912,53918 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.95569,59116 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.3683,183554 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,350.651,2.05332e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.03086,55901 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.18891,55960 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,581.055,3.53253e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.62206,58706 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.09442,58630 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9037,141471 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.75583,64099 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.82994,58688 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.5515,58688 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.42665,58688 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.3056,58611 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,361.732,2.19452e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,50.9245,331948 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.0977,241075 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1033.24,6.05109e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,686.711,3.9161e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,696.342,4.00968e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,67.7964,400530 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.1881,210799 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.8773,69820 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,56.9077,350084 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_126.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_126.txt deleted file mode 100644 index ea6400d03aa3ad71cac15db58e32481da2483845..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_126.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,512.889,2.01624e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.56292,38992 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.40367,39049 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1431.3,6.29537e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.60188,47503 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.86101,47420 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.7532,175943 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,516.412,2.62336e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.93544,50769 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.83394,50788 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,856.837,4.57126e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.06059,54186 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.71297,54205 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.5667,177722 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,354.288,2.10221e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.15538,56361 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.41349,56361 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,589.817,3.61846e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.8827,58957 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,3.13194,58976 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.2826,135265 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.32367,59049 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.12677,59106 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,5.95445,59163 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.22271,58840 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.30033,58783 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,351.258,2.19612e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,51.0832,328802 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,37.5998,249477 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1048.34,6.18942e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,697.459,4.03406e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,709.916,4.14341e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,69.5421,420211 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.7712,195854 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.08736,65243 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.317,345781 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_127.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_127.txt deleted file mode 100644 index 11dacf885883227c5be90ef5b5ad89f123c6482c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_127.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,513.644,2.01811e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.59313,38800 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.09014,38819 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1459.44,6.36232e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.52418,47452 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.50447,47425 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,30.1814,170372 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,506.56,2.53914e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.60098,50870 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.42729,50870 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,864.817,4.56692e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.51477,54332 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.582,54385 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.9645,184505 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,371.805,2.18465e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.42529,56397 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.48629,56397 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,621.968,3.79972e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.37768,59376 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.3429,59258 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.0982,142744 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.49858,64460 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.76738,59487 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.03665,59411 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.6974,59011 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.14829,58801 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,365.736,2.25533e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.5463,346146 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.5809,265556 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1101.98,6.49844e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,732.589,4.23126e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,745.956,4.34033e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,72.0537,440311 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.4142,216953 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.3084,69893 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.1276,339492 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_128.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_128.txt deleted file mode 100644 index cf76f32f4ceb561231d6ed75a16ba374bb568cdc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_128.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,490.555,1.91277e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.76923,38268 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.96098,38249 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1422.48,6.18362e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.65071,46548 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.8917,46539 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.8054,181739 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,502.785,2.53056e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.79368,49969 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.61884,50007 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,822.43,4.37771e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.58405,53441 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.36056,53498 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.7038,176948 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,361.565,2.11351e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.89698,55445 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.83042,55502 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,585.627,3.53768e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.72002,58036 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.98949,58112 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.8197,127234 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.31596,58169 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.9045,58284 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.18161,58149 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.77481,57881 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.7898,57805 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.041,2.17864e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.103,339081 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.7997,261123 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1104.5,6.39538e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,718.275,3.94992e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,740.786,4.25646e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,74.8935,457030 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.3358,210152 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.5654,69967 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,56.8306,347219 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_129.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_129.txt deleted file mode 100644 index a6f28a7c602e85be94e5f27feb7a652c82c483c8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_129.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,488.551,1.89905e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.07681,38286 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.20149,38321 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1422.93,6.17546e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.76018,46702 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.88731,46640 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.1803,176017 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,519.02,2.61002e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.01589,50031 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.83906,50050 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,853.518,4.55277e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.62392,53482 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.36114,53516 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.9432,177270 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,359.421,2.09654e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.21755,55503 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.3411,55390 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,584.796,3.46279e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.82331,58295 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.24235,58232 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.4951,140724 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.48997,63602 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.69624,58402 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.33364,58325 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.03267,58021 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.19041,58021 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,357.807,2.07422e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.1799,335929 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.9407,245484 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1110.51,6.37089e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,724.779,4.04036e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,747.887,4.3069e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.0584,435819 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,33.3917,209657 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.0744,64045 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.0774,348729 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_130.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_130.txt deleted file mode 100644 index 636e1870792259d4903ac8708a3c06e07c5ba4c0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_130.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,489.265,1.90801e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.08052,38420 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.5533,38458 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1444.06,6.23284e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.61314,46778 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.26415,46807 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.4642,174262 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,507.373,2.5221e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.76619,49995 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.34719,50033 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,852.637,4.48079e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.97144,53488 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.44322,53431 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.6332,192769 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,370.72,2.17036e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.90097,55529 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.18888,55396 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,622.862,3.76753e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.12874,58209 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.49246,58225 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.269,128353 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.09938,63426 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.99973,58434 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.08824,58358 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.12934,58185 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.18916,58185 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,363.332,2.21334e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.9078,336453 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,40.3138,262016 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1166.64,6.79804e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,759.293,4.23371e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,778.059,4.47344e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.8648,457931 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,33.4184,215316 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.80722,64270 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.8009,337244 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_131.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_131.txt deleted file mode 100644 index 50c3eb9ec42d079ea23479abe45ca801abe3360d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_131.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,522.794,2.04511e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.73265,38878 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.18905,38839 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1523.97,6.65219e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.6038,47678 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.24581,47691 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.8813,166415 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,533.498,2.69408e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.74501,51110 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.20937,51126 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,942.318,4.9949e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,3.15937,54300 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.61883,54300 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,31.5194,201945 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,371.802,2.20157e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.64565,56288 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.31787,56326 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,612.351,3.74904e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.79723,59205 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.43208,59202 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.1139,124537 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.90027,69758 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.78789,59316 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.70946,59240 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.87174,52934 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.19189,52934 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,366.553,2.23326e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,55.254,352097 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.9132,271326 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1214.21,7.11476e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,801.81,4.56785e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,812.935,4.72435e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,71.2519,429776 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3433,201451 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.90617,92328 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,57.6299,357340 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_132.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_132.txt deleted file mode 100644 index d2c262d04f052a8a164878434bea1ef52da3b321..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_132.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,525.37,2.06192e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.96747,38705 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.06894,38629 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1520.58,6.66722e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.01519,47444 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.61154,47444 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.7819,170612 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,536.154,2.72908e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.54613,51080 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.84213,51191 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,932.92,4.99892e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.20914,54418 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.4404,54476 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.1968,197082 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,367.303,2.1788e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.35217,56422 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.98562,56365 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,610.051,3.74586e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.97608,59053 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.88159,59053 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.3418,130176 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.61065,64309 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.90408,59259 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.24766,59183 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.49644,58975 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.90756,58818 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,378.999,2.3384e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.0377,338169 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.9645,264915 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1200.99,7.04572e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,787.969,4.44044e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,804.791,4.6697e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,71.1626,432984 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.0567,210482 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.88435,75320 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.4652,354827 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_133.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_133.txt deleted file mode 100644 index e8443470d82519ee6382e895f49dddd6ad947205..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_133.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,520.888,2.04579e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.47944,38877 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.12322,38972 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1514.43,6.63255e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.56204,47815 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.07541,47828 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,28.3388,163490 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,527.728,2.63761e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.54604,51022 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.10674,51096 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,957.354,5.10974e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.78369,54366 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.50894,54377 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.6448,202412 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,367.438,2.18595e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.01608,56323 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.23202,56400 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,616.571,3.78567e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.00635,59071 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.06529,59163 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.1188,129509 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.1254,59201 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.01387,59296 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.6084,59201 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.34773,59049 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.6952,64896 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,355.626,2.21807e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.0829,340629 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.1219,259621 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1158.3,6.83392e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,761.462,4.27722e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,778.415,4.54375e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,70.3622,430421 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.3293,207125 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1576,86781 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,53.7575,330000 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_134.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_134.txt deleted file mode 100644 index f156a272bc0eca63a6594e27aa836509f288799c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_134.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,518.225,2.03857e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.76772,38705 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.54213,38839 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1511.44,6.60475e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.64194,47527 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.6677,47581 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.5123,175937 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,531.752,2.68169e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.50665,51028 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.54812,51083 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,955.147,5.09862e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,3.28308,54426 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.75832,54345 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.6361,202254 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,367.801,2.18166e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.3061,56250 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.37115,56270 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,613.327,3.75759e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.20113,59033 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.25854,58995 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.5104,129265 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.39858,59068 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.01976,59087 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.26823,59087 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.39263,59087 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.12753,58550 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,353.751,2.19414e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.7501,344014 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.7621,259321 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1233.71,7.25328e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,810.47,4.59346e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,829.939,4.82735e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.6112,439468 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.6467,205827 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.57212,81121 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.215,356117 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_135.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_135.txt deleted file mode 100644 index d93aac33b8164425eb51d4dc814e2e054f163ae5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_135.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,497.796,1.93042e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.85409,38268 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.4189,38306 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1510.04,6.51319e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.77071,47233 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.40694,47306 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,30.0603,156603 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,532.12,2.66163e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.06754,50490 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.41295,50490 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,944.021,4.99783e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.60625,53848 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.67332,53906 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.0045,183834 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,370.626,2.18061e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.13735,55695 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.94213,55733 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,611.587,3.70482e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.46552,58631 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.97964,58419 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.2842,122698 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.12955,63752 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.06546,58742 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.23947,63617 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.50137,58281 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.79949,58186 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,346.122,2.12271e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,48.9698,309221 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,35.7981,229550 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1113.87,6.51033e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,721.95,4.05898e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,743.515,4.2954e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,68.1563,409251 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,29.4963,188728 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1106,69993 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,54.0631,321801 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_136.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_136.txt deleted file mode 100644 index 0b546e90369714ce255fdcfdf59eaaa4ad1cfbf9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_136.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,500.05,1.94081e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.03905,38192 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.37129,38211 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1499.93,6.50211e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.79157,47249 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.8422,47278 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,31.4395,171501 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,527.901,2.63924e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.83384,50487 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.70271,50523 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,932.858,4.94098e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.81124,53920 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.28826,53958 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.963,178504 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,374.848,2.20488e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.36002,55907 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.26815,55963 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,619.314,3.75355e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.53089,58496 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.90792,58512 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.6418,134224 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.1248,58722 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.16875,58741 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.60833,63770 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,6.17848,58588 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.31312,58186 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.901,2.19902e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.4904,348181 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.9841,258346 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1228.34,7.1658e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,798.616,4.50051e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,818.663,4.72786e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,72.3452,442673 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.4503,215227 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.604,91273 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.0678,353514 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_137.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_137.txt deleted file mode 100644 index 18601a156bdc8c142870fd2d9555bd3ee59032bc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_137.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,494.673,1.94348e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.66638,38664 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.33449,38588 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1494.56,6.44786e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.65723,47325 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.41615,47359 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,29.287,165234 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,522.499,2.58653e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.65298,50408 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.17503,50466 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,956.173,4.99662e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,3.09796,53805 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.70279,53787 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,29.8641,194485 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,368.763,2.16565e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.92277,55739 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.94981,55758 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,618.426,3.73154e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.29106,58531 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.45537,58531 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.5694,117081 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,6.14133,63675 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.84811,58722 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.87297,58645 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.98089,58472 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.19059,58242 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,356.023,2.18003e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.9909,349138 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,41.5442,275170 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1235.38,7.14779e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,813.297,4.61604e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,816.186,4.7365e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,71.9996,429052 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.682,212127 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.3575,97760 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,58.9096,356092 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_138.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_138.txt deleted file mode 100644 index 18e2ee76b7ca35b0a962cc80809c02903390cddb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_perf_138.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,493.702,1.93113e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.48485,38344 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,1.38601,38382 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1496.88,6.37549e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.5277,47233 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.27279,42369 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,30.069,166361 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,518.673,2.5721e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.63791,50474 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.15097,50529 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,951.896,4.78278e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.89698,53722 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.26968,53747 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,31.9208,199983 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,369.81,2.17044e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.24215,55713 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.09726,55636 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,622.511,3.7004e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,3.03233,58453 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.14261,58376 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.5437,133799 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.21081,58586 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.81855,58663 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.61397,58509 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.59887,58261 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.24077,58185 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,347.875,2.13694e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,52.3979,342073 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.5801,251027 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1193.2,6.96721e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,781.872,4.35658e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,800.611,4.63655e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.1664,436793 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.8312,199875 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.1147,75365 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,54.928,332389 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_41.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_41.txt deleted file mode 100644 index 070e97bed17bb07898d33e45d7ea90d12f52461f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_41.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,504.874,1.93218e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.2764,37870 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.84478,37889 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1821.15,7.86918e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.54677,48007 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.33131,48083 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.4474,181601 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,818.136,4.06536e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.06821,50905 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.92738,50924 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1332.75,7.09964e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.33106,54383 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.84814,54380 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.1628,179710 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,326.011,1.91353e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.13013,56167 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.2812,56186 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,542.389,3.25438e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.66892,58802 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.12238,58840 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9176,141070 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.40329,64347 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.65055,58916 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.26913,58916 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.00998,58802 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.06369,58631 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,359.221,2.20974e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,39.7362,254622 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.5187,230487 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1030.71,6.01365e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,594.95,3.36356e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,557.701,3.13541e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,72.117,434533 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,21.496,157302 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.77318,59332 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,60.2474,367981 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_43.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_43.txt deleted file mode 100644 index adee4a649a5230b37c713106248be75c1d031a52..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_43.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,512.584,1.97652e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.32862,38022 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,3.20465,37983 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1806.09,7.89462e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.81548,48135 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.55054,48212 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.4837,203280 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,808.325,4.12335e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.66647,51342 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.69652,51285 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1131.13,6.20321e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.82613,55541 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.96615,55541 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.3146,183952 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,322.36,1.94429e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.08904,57274 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.06011,57350 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,541.034,3.34225e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.92543,59851 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.17311,59867 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.9872,149334 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,4.56032,59981 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.84616,59904 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.16059,59904 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.07836,59561 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.96989,59504 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,366.098,2.28511e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,39.3923,252201 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,38.8196,240015 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1017.86,6.04683e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,617.427,3.51735e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,583.057,3.35479e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,73.9926,441140 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,22.9876,163395 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.987,64947 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,62.2584,391012 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_45.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_45.txt deleted file mode 100644 index 151d408eed7b2ed7f739d5916eac202a3cf2fec2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_red_samp_45.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,502.579,1.9411e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,1.98366,38272 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.89252,38310 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1802.71,7.87184e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.703,48252 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.11467,48291 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,34.2146,192488 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,780.652,3.96253e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.01461,51323 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.35058,51343 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,1158.69,6.33308e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.46347,55465 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.77848,55541 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.3453,194165 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,327.96,1.96248e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.31067,57084 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.46417,57084 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,542.253,3.3329e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.80732,59851 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.14159,59793 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.8409,143814 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.34982,65415 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.65468,59888 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.04942,59888 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.50226,59622 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,8.53594,65470 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.603,2.2467e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,45.8217,286536 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,37.9513,239505 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1017.63,6.04958e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,601.659,3.44212e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,576.172,3.33753e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,74.0101,448507 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,26.2698,164121 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.7095,60231 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,61.3979,381951 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_231.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_231.txt deleted file mode 100644 index b1900b41c2d7c78e6b425d7b3e2f52f8c3f08606..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_231.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,312.955,1.05814e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.29102,32634 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.92555,32750 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1270.25,4.95079e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.64185,42084 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.93026,42046 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.9684,164910 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,375.799,1.72684e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.62242,45265 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.86757,45437 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,628.419,3.06087e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.55881,49018 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,1.97983,49095 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,24.1804,147478 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,236.215,1.27624e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.07611,50990 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.91714,51009 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,416.23,2.3139e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.9468,53167 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.39957,53167 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.4841,128208 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.49903,58078 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.9957,53450 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.69527,53298 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.90681,52994 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,6.96046,52917 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,398.579,2.22238e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,75.6271,418107 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,53.4082,301014 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,2181.08,1.18919e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1541.07,7.72153e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1518.65,7.74613e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,138.242,667792 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,43.985,200120 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,16.4329,76086 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,126.613,598657 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_232.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_232.txt deleted file mode 100644 index acea102c80b68b7a0c693135d1a2bd9dc6dbb418..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_232.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,320.765,1.0895e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.74497,32444 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.91287,32520 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1271.88,4.91642e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.52866,42009 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.32443,42066 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.5942,164724 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,374.875,1.70587e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.75916,45227 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.94523,45284 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,631.129,3.06489e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.68424,49132 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.28821,48995 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.4421,157819 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,231.786,1.24844e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.17838,50775 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.17326,50736 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,418.555,2.32751e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.8323,52971 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.43531,53009 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.7868,122228 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.95036,53082 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.27055,53253 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.92836,57843 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.84128,52968 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.47956,52968 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,408.421,2.2672e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,76.7764,449207 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,54.9257,305927 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,2268.42,1.23557e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1680.26,8.35215e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1579.97,8.0778e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,134.762,654124 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,42.9408,220383 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.0336,66829 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,119.188,566439 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_233.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_233.txt deleted file mode 100644 index 47aa85aa79ce2df08664ec1ae85b31013416baf5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_233.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,364.281,1.42832e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.01979,37783 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.91876,37859 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1724.95,7.72746e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.61807,48656 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.72616,48711 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.8694,198585 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,485.892,2.51283e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.7669,51802 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.50969,51876 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,882.543,4.87772e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.41816,55778 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.91368,55832 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.075,201041 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,301.008,1.83866e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,3.06062,57411 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.58459,57355 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,558.599,3.47618e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.79893,59907 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.99259,59945 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.0353,143544 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.68991,65276 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,2.00786,60020 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.53729,60020 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.20431,59634 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.20196,59403 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,364.101,2.26809e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,56.8705,359483 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,41.6894,285564 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1542.04,9.30453e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,983.446,5.5907e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1027.43,6.08045e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,76.4432,458940 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.5815,196982 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.17091,76617 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,61.1893,380972 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_234.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_234.txt deleted file mode 100644 index 19f49e9d486a58accb45ef9903d341e8b9585c09..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_234.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,359.171,1.3654e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.56808,36755 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.57861,36830 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1720.43,7.51741e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.55141,48008 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.80565,47875 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,29.423,167745 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,477.2,2.42612e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.41721,51394 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.20194,51432 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,887.624,4.77616e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.74392,55249 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.76305,55283 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,30.7709,205785 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,301.656,1.81816e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.88015,57099 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.21406,57041 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,562.265,3.49027e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.89061,59568 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.32229,59603 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.4432,119295 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.97577,65011 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.85704,59733 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.36932,64953 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.80956,59428 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.87399,59198 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,354.283,2.21567e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,53.9692,353895 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.3985,273572 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1682.64,1.00985e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,966.772,5.42835e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,994.348,5.79107e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,82.3764,478623 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.1803,209638 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,10.9806,96473 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,66.7879,398824 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_235.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_235.txt deleted file mode 100644 index f5857d3264f277810e2ca7bd7bde7cfd7f1d46cd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_235.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,359.179,1.2212e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.8443,33172 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.75128,33192 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1725.86,7.02108e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,2.15976,45546 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.74376,45603 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.1932,192282 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,486.73,2.42326e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.32251,49345 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.11051,49344 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,874.152,4.65541e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.14318,53834 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.4425,53834 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.2372,171817 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,304.18,1.78209e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.93003,55659 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.31419,55659 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,556.548,3.35208e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.22421,58182 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.0956,58217 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.3645,116615 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.83999,63656 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.86607,58438 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,5.43334,58286 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.87446,57981 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.7042,57854 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,420.697,2.53764e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,85.3445,534738 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,60.8316,387686 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,3076.32,1.7793e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,2322.36,1.18957e+07 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1926.57,9.90526e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,147.311,701287 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,47.9584,230534 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,14.2147,71974 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,134.215,637238 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_236.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_236.txt deleted file mode 100644 index b38950da77e2c0ae11b36cc570e8a3c64b7c100b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_236.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,369.184,1.30472e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,1.83851,34353 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.51925,34373 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1814.45,7.54365e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.95391,46542 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.14664,46540 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.855,187012 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,507.61,2.53206e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.52722,49910 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.48972,50117 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,946.582,5.02218e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.71148,54187 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.50529,54206 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,25.3727,162878 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,339.38,1.9942e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.9773,56181 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.00066,56181 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,600.8,3.65449e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.04114,58747 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,1.99851,58760 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.3574,159457 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.00924,64151 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.56626,58838 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.17109,58838 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.10332,58765 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.09669,58536 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,416.965,2.55865e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,76.3945,481875 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,53.5784,341610 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,2759.9,1.6179e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1799.2,9.41621e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1565.91,8.37989e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,126.676,649481 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,41.5973,232891 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,12.366,69394 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,110.356,555019 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_237.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_237.txt deleted file mode 100644 index 3c5e6c169a08069d304782a66cf2b965c55e2931..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_237.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,366.675,1.45154e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,1.87509,37821 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.80517,37879 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1807.03,8.02866e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.63839,48695 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.56049,48752 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,33.7969,185595 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,512.094,2.68803e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.58895,52031 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.87138,52047 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,941.746,5.20451e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.13765,55821 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,3.10807,55782 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.328,190552 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,330.901,2.01841e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,2.17483,57648 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.25227,57686 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,595.453,3.71972e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.89838,60127 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.00248,60085 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,17.9617,138566 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.71676,65494 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.72594,60254 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.70641,65417 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.9998,59796 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.73671,59723 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,358.565,2.24855e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,51.6014,332793 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,37.4462,251934 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1604.66,9.70904e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,862.655,4.92921e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,894.783,5.32491e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,69.249,427305 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,31.0203,213623 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.1213,98381 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,55.7902,348237 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_238.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_238.txt deleted file mode 100644 index dbc124748794072b323ece1525a773ec28ea633e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_238.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,361.396,1.42936e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,3.077,38038 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.85547,38057 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1801.71,8.07173e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.50293,48844 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,1.82636,48860 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,32.1758,175635 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,505.379,2.6346e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,1.64517,52142 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,1.96376,52219 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,943.341,5.17271e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,1.88607,56170 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.52344,56131 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,27.3298,184853 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,332.621,2.02922e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.81202,57654 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,1.94287,57596 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,597.906,3.72187e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,2.1228,60207 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.09502,60223 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,19.2042,151258 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.2175,65703 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.67349,60463 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,7.27326,60313 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,5.12176,60010 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.77034,59780 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,332.125,2.08543e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,40.8915,282376 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,31.3805,224408 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1229.22,7.34539e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,658.089,3.74298e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,677.838,3.98656e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,56.4301,354013 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,25.7725,165318 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,9.87913,71131 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,45.0084,284986 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_239.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_239.txt deleted file mode 100644 index cc13932a92b884c0e4a74dda6d2bd25edbadd7bb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_canny_fp32/alexnet_canny_samp_239.txt +++ /dev/null @@ -1,108 +0,0 @@ -Conv1,365.933,1.40138e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,2.80164,36850 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,2.55448,36888 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,1806.48,7.93584e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.88536,48163 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,2.64411,48201 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool1,35.6716,207285 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,512.369,2.65332e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,2.04005,51632 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,2.16469,51647 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,935.445,5.14346e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,2.03416,55351 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,2.77188,55389 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Pool2,28.7423,188823 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,338.056,2.03138e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,1.87183,57064 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,2.23042,57199 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Conv6,592.54,3.65101e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,1.68578,59431 -Add6_f2h,0,0 -Add6_h2f,0,0 -Tanh6,2.24219,59489 -Tanh6_f2h,0,0 -Tanh6_h2f,0,0 -Pool3,18.0715,124954 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,5.97676,64936 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add7,1.76684,59600 -Add7_f2h,0,0 -Add7_h2f,0,0 -Softmax1,6.85672,59600 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 -ArgMax1,4.97948,59450 -ArgMax1_f2h,0,0 -ArgMax1_h2f,0,0 -Select1,7.37134,59450 -Select1_f2h,0,0 -Select1_h2f,0,0 -Contract1,354.286,2.1935e+06 -Contract1_f2h,0,0 -Contract1_h2f,0,0 -tensorReduce1,54.0783,348349 -tensorReduce1_f2h,0,0 -tensorReduce1_h2f,0,0 -tensorMap11,39.247,244872 -tensorMap11_f2h,0,0 -tensorMap11_h2f,0,0 -Conv7,1792.85,1.07184e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Conv8,1012.73,5.58673e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Conv9,1045.6,5.94124e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -tensorMap21,83.6212,481961 -tensorMap21_f2h,0,0 -tensorMap21_h2f,0,0 -tensorReduce2,32.2344,206842 -tensorReduce2_f2h,0,0 -tensorReduce2_h2f,0,0 -tensorReduce3,11.1349,73456 -tensorReduce3_f2h,0,0 -tensorReduce3_h2f,0,0 -tensorMap22,69.2339,398246 -tensorMap22_f2h,0,0 -tensorMap22_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_120.txt deleted file mode 100644 index ff28fccb9e6a9464df2a3ecf2e0f2f450508f710..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,337.4,3.68478e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.237007,15382.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.184344,15371.1 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.68,56288.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,495.317,5.5504e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.231612,16226.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.183714,16224.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.68191,47875.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,180.324,2.10248e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.20503,16949.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.108639,16953.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,262.307,3.23845e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.219605,17250.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.184172,17220.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,184.059,2.2692e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.218521,17340.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156735,17319.5 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.96304,34615.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.5155,23479.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.161062,17309.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.16766,82622.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_151.txt deleted file mode 100644 index bae151beca45f4cfd85d2909d09707078928d782..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,190.193,2.05276e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.224536,14885.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.174028,14864.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.67015,56078.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,227.773,2.5145e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.215689,15782.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.167237,15790.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.83595,49012.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,105.987,1.20833e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207775,16320.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.15142,16319.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,131.067,1.56614e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212713,16587.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.152012,16572.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,96.4066,1.14964e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.208921,16684.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.153449,16688.5 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.84486,33369.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.49149,21018.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.174572,16683 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.27003,81690.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_152.txt deleted file mode 100644 index a6d85b7e69a0a8d6017a20fae3eae2a872189fd8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,192.983,2.08398e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.228424,14967.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.178303,14969.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.74804,59087.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,229.723,2.54853e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219602,15849.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.176313,15847 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.95143,49174.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,106.015,1.21477e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.217896,16375.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.162725,16361.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,132.317,1.56904e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.215007,16595.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.15861,16580.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,97.0358,1.15623e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.211979,16686.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.159823,16686.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.02821,33371.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.55131,20448 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.221237,17272.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.25398,80835.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_153.txt deleted file mode 100644 index efc436221c5a779f64b4c606abce1c3c410a66a7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,178.417,1.90236e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.226863,14723.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.178841,14731.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.61066,54457.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,226.111,2.46688e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.218232,15622.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.175753,15628 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.81594,47730.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,105.093,1.17798e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208955,16155.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.156338,16157.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,130.75,1.54768e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.206699,16418.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.155398,16405.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,96.3056,1.13727e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.212475,16512.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.153935,16485.7 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.83618,32979 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.52322,19941.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.182194,16489.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.29497,80765.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_154.txt deleted file mode 100644 index 00f06e6d161b2f4b4fc21a7bc9faaceb09188007..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,179.945,1.92348e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.225087,14961.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.180134,14929.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.66463,55779.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,225.428,2.49274e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214981,15791.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.176168,15803.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.7666,47423.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,105.113,1.19446e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210402,16298.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.154527,16294.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,130.745,1.5617e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213,16525.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.15965,16531.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,96.5646,1.13679e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.222831,16602.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.158901,16594.5 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.87082,32332.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.50962,19449.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.241307,17164.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.38585,82974.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_155.txt deleted file mode 100644 index 5fd4c898cb169b014a03f0fde202e0ca6199b0a7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,225.41,2.4281e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.22541,14922.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.176754,14929.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.75537,55816 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,319.68,3.55718e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.222315,16190 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.177609,16157.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.88735,49319.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,128.273,1.48664e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210946,16714.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.155471,16712.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,190.818,2.30027e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.215429,16672 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.154931,16660.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,126.181,1.50897e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.211749,16716 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.153986,16695.1 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.89296,33399.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.53906,20586.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.149202,16712.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.26271,81001.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_156.txt deleted file mode 100644 index bbc1088bcd8bf1c07de1df981e03cd6095171473..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,224.225,2.43331e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.221042,15080.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.171714,15086.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.85658,58264 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,332.686,3.76477e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219249,16390.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.177142,16384.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.94905,52366.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,126.868,1.48313e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209633,16919.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.158117,16915.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,188.538,2.29799e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212866,16876 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.159141,16860.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,124.236,1.50017e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205157,16906.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.1519,16899 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.88824,32924.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.47725,20776.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.158312,16901 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.36032,83468 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_157.txt deleted file mode 100644 index cec1b029906bdced778e41c051734df7e8b978dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,228.935,2.55515e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.236312,15404.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.198546,15397.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.53177,56761.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,333.316,3.83079e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.228549,16594.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.173087,16566.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.9573,51313.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,140.057,1.64818e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.220965,17157 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.158402,17149.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.221,2.31232e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.220901,17351.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.16015,17353.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,132.938,1.64365e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.224849,17416.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156607,17412.7 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.90251,34808.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.50488,21410.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.184002,17410.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.37759,85043.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_158.txt deleted file mode 100644 index 65dd44383241454da0cd35e1684a2d6dadb29c8d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,215.378,2.27857e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.217918,14646.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.178741,14652.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.9282,57868.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,316.597,3.45691e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214648,15928.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.172655,15926.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.01175,49420.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,127.865,1.46021e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213186,16448.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.15798,16429.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,192.923,2.28677e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210629,16387.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.157673,16397.2 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,126.591,1.46955e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.212421,16428.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.1622,16430 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.95539,32026.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.56776,20221.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.179224,16435.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.40989,81173.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_159.txt deleted file mode 100644 index bcad05006187854202d93e2e3daf47381f0c18e8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,214.73,2.27431e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.234056,14695.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.180958,14716.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.59181,58061.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,328.568,3.60365e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.218437,16028.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.181429,16020.8 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.89355,48878.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,128.364,1.46049e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209243,16527 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.168834,16513.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,192.55,2.29348e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213163,16511.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.158575,16488.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,126.548,1.47275e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.211439,16494.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.170008,16481.2 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.8865,32958.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.52285,20251.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.189496,16481.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.2397,78864.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_160.txt deleted file mode 100644 index fa9616a04c7532529b9ee6e83a8e38dab29bbee2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,225.508,2.43633e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.228946,15126.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.208084,15116.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.87676,58101 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,329.712,3.7038e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.22812,16265.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.171499,16277.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.1347,53740.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,140.863,1.64544e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.215406,16803.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.156818,16782.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,186.483,2.28163e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217285,17000.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.157375,16977.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,133.798,1.62205e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.210635,17017.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156533,17009.9 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.22091,33981.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.59925,20940.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.164171,17017.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.21735,81657.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_161.txt deleted file mode 100644 index ee0b75a4e8e8db75946bf0daa98bd8967e737ea2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,235.923,2.61302e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.230625,15330.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.18556,15318.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.80264,59670.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,342.963,3.89751e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.22461,16497.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.169845,16488.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.94556,50348 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,140.457,1.66547e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21917,17071.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.157157,17065.7 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.744,2.3113e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.221905,17271.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.161519,17248.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,133.716,1.64908e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.217829,17326.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.157448,17322.5 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.01388,34639.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.54772,21361.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.159468,17303.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.33729,84475.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_162.txt deleted file mode 100644 index 37bce90b936e778afa83970ba465cf3ba11f8d1d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,240.706,2.69778e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.230961,15507 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.17644,15512.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.5114,57017.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,342.453,3.94576e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214651,16661.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.174081,16650.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.93383,52551.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,139.38,1.65061e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211301,17204.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.15572,17200.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,184.095,2.30956e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217528,17414 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.161151,17416.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,132.362,1.64178e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.21267,17474 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156981,17466.4 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.85461,34934.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.48784,21484.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.17036,17458.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.28267,84640.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_163.txt deleted file mode 100644 index 33ad0446b56d9654e14069157364a799cadc1984..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,235.701,2.59326e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.231121,15368.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.176635,15372.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.83085,59848.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,342.133,3.92817e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219285,16562.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.173977,16559.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.01668,52238.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,140.035,1.66625e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211371,17128 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.153938,17116.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.731,2.32171e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21819,17312.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.166744,17316.2 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,133.464,1.65251e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.211656,17390.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.155691,17375.1 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.92438,34744.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.53433,20733.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.207566,17367.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.29477,85021.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_164.txt deleted file mode 100644 index 5f6fe09a6522c5d1824453f0e4ed3725d902328a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,236.787,2.61292e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.231442,15246.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.179746,15238.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.85249,60955.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,344.584,3.88198e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.22509,16425.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.171592,16414.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.00496,51666.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,140.511,1.65112e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.222171,16977.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.154984,16981.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.223,2.29166e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217253,17197.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.155346,17178.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,133.207,1.62981e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.21491,17272.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.154895,17265.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.02137,34521.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.5765,21294.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.158268,17267.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.23312,83639.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_165.txt deleted file mode 100644 index 2de11a32b2e083f701dee93a6685a4c9102b174b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,225.813,2.45634e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.233342,15120.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.180479,15120.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.91967,59642.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,341.688,3.82968e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.224363,16277.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.18157,16264 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.14589,52613.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,139.652,1.62184e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21387,16796.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.157282,16792.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.498,2.27214e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217282,17019.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.159055,17003.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,132.844,1.61159e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.213957,17076.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.164101,17055.2 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.00697,34112.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.59257,21871.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.187244,17034.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.17514,80647.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_166.txt deleted file mode 100644 index a3e6094c9516db9c96c9563ba1c9cb760f249040..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,227.965,2.49644e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.233832,15110.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.183643,15089.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.71041,58809.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,338.785,3.79768e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.224769,16292.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.177023,16275.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.87674,49693.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,139.793,1.63383e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216245,16849.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.153941,16843.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.055,2.27607e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.221573,17061.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.161151,17063.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,132.606,1.61497e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.216773,17149.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156645,17137.8 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.88502,34245.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.53558,21072.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.190939,17103.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.29871,82918.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_167.txt deleted file mode 100644 index 635233830164a2bdbced47023dde8785da3fe82d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,229.121,2.46673e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.237256,14986.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.180729,14979.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.91751,59875 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,338.076,3.76931e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.226971,16207.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.178975,16190.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.16684,53615.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,140.639,1.62088e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216751,16727.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.158767,16737.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,185.833,2.2697e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.423038,18195.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.164844,16940.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,133.264,1.61332e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.218312,16998.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.160332,16998.2 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.05673,34000.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.54658,20967.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.178684,17000 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.39485,83929.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_168.txt deleted file mode 100644 index 4939acc2b9f1b971a3aa299826af2d177df97fe1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,221.258,2.3963e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.231151,15147.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.180821,15134.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.89041,59736.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,339.061,3.79971e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221039,16271.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.177874,16271.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.05763,51343.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,137.873,1.60014e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.215893,16805.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.158648,16800 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,184.004,2.25579e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21884,17009.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.159909,17011.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,131.748,1.60159e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.216965,17066.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.160671,17051.4 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.9999,34102.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.55789,20987.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.165359,17047.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.33899,83291.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_261.txt deleted file mode 100644 index d6b5d9b3b2d94898e42aa36ac620cf34d0b76c38..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,193.96,2.08465e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.223755,14883.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.181375,14887.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,5.19233,57181.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,263.654,2.89224e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.206002,15862 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.181723,15858.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.80138,47581.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,125.379,1.44447e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.201644,16346.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.156815,16342.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,180.376,2.12976e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209685,16576.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.158718,16549.7 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,127.509,1.53368e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.206152,16654.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.155881,16631.7 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.80793,33276.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.49461,20365.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.188994,16641.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.29749,80568.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_262.txt deleted file mode 100644 index 114fd5b4019f70bc14f1e14d97de5daa68f1113e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,199.126,2.1474e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.224386,14893.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.182623,14902.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.89175,58873.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,267.198,2.93259e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217196,15841.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.177897,15843 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.99445,49056.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,126.885,1.46094e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207109,16293.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.160601,16303 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,182.224,2.17613e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210142,16508.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.158648,16504.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,129.398,1.54729e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205438,16578.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.156853,16565.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.98616,33117.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.55359,20260.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.191097,16559.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.27832,80997.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_263.txt deleted file mode 100644 index 38f5c465aeb55bedf12d25cb499b585b13788821..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,237.756,2.58801e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.211435,15177.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.177055,15162 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.77029,55082.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,325.669,3.62553e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.210341,16274.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.174693,16276.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.98304,50490.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,166.14,1.95866e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203864,16771.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.156665,16773.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,253.933,3.09534e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209547,16969.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.160223,16948.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,176.456,2.16171e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.207643,16997.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.155656,16996.9 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.97852,33971.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.53225,21108.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.145474,16990 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.29311,81391.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_264.txt deleted file mode 100644 index dce8d97e08c0aa466e8cb19d451ab46e8ee4ae0c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,235.998,2.58229e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.220895,15223 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.182578,15202.1 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.71929,59216.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,325.495,3.59715e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.213483,16263.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.175394,16267.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.89997,48840.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,166.348,1.94807e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.202837,16815.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.151289,16789 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,253.407,3.0948e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.205586,16987.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.156341,16989.7 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,176.472,2.15066e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205435,16995.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.159058,16989.5 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.90611,33983 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.55878,21071.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.197848,16995.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.32883,82994.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_265.txt deleted file mode 100644 index b552df392282519064dc22347d342308a906c3a4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,236.262,2.55492e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.213294,15194.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.172805,15192.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.63543,58406.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,323.781,3.60521e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.209067,16285.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.172606,16264.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.87703,49669.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,165.715,1.94638e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203263,16813.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.156082,16817.6 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,253.168,3.09459e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210456,16974.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.151186,16970.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,175.698,2.1386e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.203525,17033.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.153666,17016.4 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.89708,34036.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.58343,21124.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.286414,17699.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.32412,82374.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_266.txt deleted file mode 100644 index 22b32b349ffcf83de0782941f16da54fff293c4b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,260.084,2.82767e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.21558,15131.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.173804,15140.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,5.15458,60615.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,360.348,4.0013e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.211848,16307.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.176095,16302.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.2723,53531.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,162.996,1.91715e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.206091,16838.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.158556,16827.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,239.795,2.92844e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209947,16970 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.159752,16962.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,167.049,2.0305e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.206168,17022.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.158207,17011.3 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.10691,34018.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.61344,21134 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.149477,17000.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.2582,82237.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_267.txt deleted file mode 100644 index 078360ac09302ce001128a1ea1df997e000ad785..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,252.535,2.80306e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.217663,15443.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.173861,15437.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,4.64457,58441 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,353.487,3.9959e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.211019,16588.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.174386,16588.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,3.83916,50282 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,160.342,1.90628e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207455,17071.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.159116,17073.6 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,235.642,2.91771e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.207208,17231.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.15366,17212.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,163.899,2.026e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.2098,17251.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.159298,17246.2 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.88925,34482.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.50502,21389.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.187544,17253.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.44141,85096.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_268.txt deleted file mode 100644 index 7ef17faf3c5c17e5bc7738f82cd5a261a17345ce..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,260.445,2.83985e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.216651,15159.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.179538,15152.1 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,5.12406,59811.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,359.9,3.99244e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.211919,16330.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.184728,16334.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.28685,56435.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,163.118,1.91288e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208735,16854.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.165135,16848.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,239.929,2.92493e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209388,16985.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.162031,16976 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,166.986,2.0313e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205096,17001.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.160009,16994.1 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.10721,33995.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.58976,21106.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.155829,16999.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.24343,82294.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_269.txt deleted file mode 100644 index d149a94dd81196c2a6f9006661408e2a48125585..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,257.784,2.8414e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.218043,15340.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.180108,15340.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,5.07471,61314.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,357.124,4.0032e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.205896,16518.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.173925,16516.7 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,4.18427,52019.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,161.585,1.88286e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210111,17031.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.158485,17026 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,237.55,2.92622e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.206741,17144.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.157791,17134.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,165.515,2.03034e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.209352,17161.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.159407,17163.9 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,1.97624,34313.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,1.54299,21315.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.159292,17162.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,6.30759,83087.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp32_perf_fp32_120.txt deleted file mode 100644 index 25cd9de0d053a2797e8942f0ccc84be5ff9766c6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_cifar10/alexnet_cifar10_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,63 +0,0 @@ -Conv1,575.774,5.82108e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.232444,14477.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Tanh1,0.207302,14456.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Pool1,12.7919,128991 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,1017.01,1.00097e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.223817,14906 -Add2_f2h,0,0 -Add2_h2f,0,0 -Tanh2,0.132729,14921.2 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Pool2,8.00501,91648.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,287.023,3.18789e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21968,16490.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.14631,16490.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Conv4,396.7,4.79884e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.22079,17588.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.176054,17577 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Conv5,289.129,3.55552e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.211507,17881.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Tanh5,0.130201,17874.8 -Tanh5_f2h,0,0 -Tanh5_h2f,0,0 -Pool3,2.80131,41084.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,0.909982,22973.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,0.142185,17870.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Softmax1,7.7959,102068 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_120.txt deleted file mode 100644 index 391282ad11099343f49a83189f68acbb8bd1942d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,389.264,3.32015e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.483012,24306.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.370173,24322.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.00216,49953.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,519.829,4.84468e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.441751,27734.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.368817,27753.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.49479,55560 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,187.256,1.86581e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.429559,29112.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.341534,29112.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,260.793,2.80254e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.428932,29992.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.339095,29950.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,181.954,1.97205e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.417661,30502.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.337995,30509 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.74986,32041.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.70647,83156.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.92935,44395.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.270181,30508.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.68727,51425.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.0127,36299.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.277624,30641.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.41295,35288.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.43176,31835.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,38.5284,446826 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_151.txt deleted file mode 100644 index 7b36fde13801276c18018adc16ef4890b4eb4890..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,249.629,1.96776e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.492637,21956.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.396279,21952.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.75587,45092.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,257.644,2.14873e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.439127,24433.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.373354,24445.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.29218,48932.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,136.118,1.22967e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.409822,25675.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.33148,25663.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,149.576,1.44381e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.394558,26700.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.337138,26719.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,110.543,1.09229e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.385643,27381.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.335415,27397 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.60113,27404.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.17822,69631 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.86901,38001 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.276433,27424 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.59152,43516.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.06847,30872 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.249387,26073.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.32902,30994.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.274226,27134.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.8362,409193 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_152.txt deleted file mode 100644 index dc32d18e93ee5ec80757389dfd4bf1a7e96cef0d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,249.51,1.99001e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.515593,22258 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.383677,22246.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.72443,45690.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,267.601,2.27311e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.453923,24781.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.381284,24816 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.27455,49654.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,139.903,1.27269e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.416875,26016.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.341041,26020.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,158.912,1.54462e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.428375,27092.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.34789,27065.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,117.123,1.16661e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.405021,27677 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.339722,27677 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.80161,30316.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.25399,70292.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.82906,38380.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.283857,27738 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.73987,46457 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,0.978144,32720.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.273834,27910.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.41574,31870.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.456221,28963.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,36.5775,406615 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_153.txt deleted file mode 100644 index 5838b4148e0e28bbabd3298535c42729010bc73a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,238.617,1.86414e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.656252,21677.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.429636,21685.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.13473,46761.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,251.066,2.09823e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.480643,24174.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.39256,24189.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.665,48410.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,135.146,1.2094e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.407645,25429.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.33402,25418 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,150.325,1.43901e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.412893,26413.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.335717,26432.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,111.506,1.0936e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.394468,27043.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.334455,27054.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.84151,32371.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.15433,68457.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.8628,37471.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.275672,27115.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.7518,44288.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.01497,31971.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.218929,27275.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.23204,31135.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.299947,28339.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,36.6818,395989 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_154.txt deleted file mode 100644 index e285249a3496789ec0030d202e4705dde7cf6cbf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,239.504,1.9089e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.650818,22087 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.299569,22075.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.39131,49930.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,255.645,2.17619e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.475823,24601.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.397444,24620.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.71863,49305.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,138.815,1.25627e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.411556,25868.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.325137,25884 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,158.938,1.53668e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.433379,26864.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.340388,26876 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,116.678,1.14687e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.417168,27485.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.334533,27493.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.78091,31746.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.30965,69865 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.90527,39089.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.283275,27570 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.8039,45151 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.0954,33544 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.285861,27719.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.41655,31690.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.380522,28765.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,36.7626,404634 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_155.txt deleted file mode 100644 index 5e943f57f9907e6da03e34e63553c2f844ecbccc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,276.643,2.24322e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.479946,22664.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.394717,22641.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.99556,47675.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,315.117,2.73945e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.431011,25580.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.386417,25577 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.63967,52256.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,143.233,1.33647e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.419882,26834.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.341028,26838.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,166.044,1.65032e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.415991,27803.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.343544,27800.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,122.109,1.20788e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.408822,28445.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.342884,28456.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.75296,29929.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.25413,73012.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.87899,39619.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.285656,28488 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.66321,46903 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.06091,33666.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.270271,28558.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.45742,32708 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.432266,29718 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.0684,418536 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_156.txt deleted file mode 100644 index 05d5be47e55158369efbdde8f14412a2b4ba1c5d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,288.752,2.36707e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.48253,22860.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.400331,22856.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.66718,45729 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,321.833,2.80938e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.434877,25743 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.367383,25746.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.47794,51540.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,147.136,1.38368e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.41173,27110.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.343978,27107.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,175.018,1.76817e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.412656,28152.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.335921,28167.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,129.575,1.32628e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.41183,28804 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.334725,28788.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.76962,33243.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.32825,74271.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.94442,41180.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.278149,28848.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.75426,47543.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.02963,34074.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.276062,28937 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.4788,33152 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.453994,30107.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.0387,421197 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_157.txt deleted file mode 100644 index 64684f24133f6f23242ceb1029b6c6dacf73755e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,287.849,2.34643e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.49487,22794.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.395504,22790.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.06029,46818.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,318.686,2.78154e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.436592,25620.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.371281,25635 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.65472,51293.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,147.071,1.37849e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.433847,26983.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.339428,26995.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,174.831,1.76004e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.430601,28068 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.342129,28068.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,129.317,1.32483e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.419414,28668.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.338391,28691.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.90188,31434.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.3576,75277 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.83406,40071.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.283825,28717 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.75734,47488.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.01963,33994.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.274066,28845 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.44692,33041.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.424464,29997 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.4332,418065 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_158.txt deleted file mode 100644 index 905a3f5e14a3c5ec89e252b13b933e1ffdb4e013..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,270.088,2.18654e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.490774,22505.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.399998,22517.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.98629,45042.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,314.854,2.71404e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.451389,25364.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.386513,25341.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.99894,53387.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,142.604,1.31369e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.433437,26636 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.342642,25282.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,165.705,1.63866e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.413342,27617 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.339486,27605.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,121.446,1.22417e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.413795,28278.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.339543,28294 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.78271,31197.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.3198,72896.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.91834,40430.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.293348,28305.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.69246,46658.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.04285,33435.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.284421,28385.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.48066,32561.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.443658,29560.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.0832,414872 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_159.txt deleted file mode 100644 index 32c740c4ce5a6ff9384826b23606df002e1f646d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,279.982,2.26971e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.481142,22551 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.373597,22543.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.8545,46030.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,313.554,2.70775e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.437494,25421.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.394635,25394 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.40226,50879.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,145.959,1.3586e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.415294,26721.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.33779,26740.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,174.225,1.73974e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.425034,27794.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.33946,27809.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,127.499,1.29883e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.422186,28431.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.340958,28431.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.84849,34241.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.30764,73589.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.94725,40734.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.285675,28477.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.72433,47070.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.02435,33728.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.278507,28597 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.45965,32815.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.423805,29740.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1753,416022 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_160.txt deleted file mode 100644 index 87094f1c2cac516168b0d16205249a4ae613d980..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,278.582,2.26112e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.478787,22581.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.385738,22528.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.6595,45160 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,314.579,2.71203e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.437591,25478.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.384951,25501 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.46229,51024.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,145.181,1.35071e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.450039,26805.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.341073,26817.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,173.306,1.72907e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.42312,27889.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.345725,27897.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,127.616,1.29542e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.413476,28473.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.33898,28473.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.84259,32887.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.32521,73432.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.90936,40742.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.282967,28515.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.73178,47054.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.01908,33731 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.279268,28668 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.44412,32770.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.432491,29714.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1026,416622 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_161.txt deleted file mode 100644 index 3a56a4bf675a3bf9948d0ef29aca0f7002725cb1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,304.167,2.52881e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.493008,23243 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.369675,23215.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.0829,48876.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,356.731,3.18077e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.433879,26472.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.391038,26438.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.9814,54261 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,146.87,1.40829e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.410423,27754.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.33619,27758.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,174.111,1.79333e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.412714,28703.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.339773,28695.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,128.736,1.34597e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.406551,29302 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.34323,29298 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.83054,33775.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.32489,75808.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.95344,41951.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.27694,27830 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.66519,48503.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.02264,34706.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.28323,29449.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.47741,33744.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.427056,30620.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.3337,429606 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_162.txt deleted file mode 100644 index da47c26710f5b8303f9535b705bf97daad8ab2f5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,304.004,2.6168e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.539228,23990.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.401866,23998.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.68873,47069.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,359.454,3.24672e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.442576,27015.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.400215,27000.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.44952,54042.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,207.715,2.04096e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.428535,28615.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.339985,28602.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,258.236,2.76714e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.431414,29939.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.345457,29932.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,184.405,2.0265e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.421232,30677.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.339416,30684.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.81525,36810.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,8.01571,86814.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.88103,44284.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.288837,30681 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,5.04182,53482.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,0.999456,36414.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.280305,30817.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.52861,36528.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.379134,30897.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1339,443636 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_163.txt deleted file mode 100644 index 5ceaa1add158ee879b8884d085de3430cd1efcc2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,304.407,2.62039e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.49064,23937 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.38202,23940.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.76337,47931.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,356.786,3.26015e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.434896,26972.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.376894,26999.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.44882,52672.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,207.584,2.07129e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.41841,28550.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.334371,28542 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,257.275,2.75946e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.417578,29920.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.333657,29916.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,183.961,1.97748e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.418078,30695.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.34346,30676.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.73845,36752.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.40277,82432 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.88325,44330.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.273969,30680.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.65854,51315.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.02292,36448.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.269804,30768 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.41758,35334.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.413661,31958.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1711,444967 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_164.txt deleted file mode 100644 index d5d4542dee5358e1de3d286cf1ad212cffeeced7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,308.637,2.64942e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.474793,24067.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.377118,24075.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.90753,48177.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,378.192,3.47264e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.435856,27087.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.377188,27084 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.78108,56999 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,207.571,2.06591e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.428707,28731 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.339512,28734.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,258.155,2.77741e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.428867,29970.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.346398,29981.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,183.849,2.02864e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.429629,30730.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.345482,30737.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.71801,33888.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.37724,82581 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.89893,44468.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.282481,30734.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.70085,51423 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,0.946164,36590 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.281477,30848.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.4291,35522 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.424144,32133.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.4098,447703 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_165.txt deleted file mode 100644 index adeba7a2fea31bc1224d6dab7d5d76fa79bf1a96..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,297.534,2.43996e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.481245,22898.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.397425,22875.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.61146,46951.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,355.533,3.1295e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.438102,26065.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.37743,26088.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.47939,52203 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,145.64,1.37532e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.419159,27365.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.338443,27381.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,173.563,1.76793e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.453616,28396.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.345111,28403.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,127.973,1.3209e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.429303,28982.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.353804,28985.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.80588,33447.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.96746,80240.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.94574,41473.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.28414,28989.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.94779,51072 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,0.976051,34345.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.277368,29146 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.51731,34456 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.363063,29203.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1203,424598 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_166.txt deleted file mode 100644 index 90a5a9bb27c1bebca6314875fa69514039e4080f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,297.484,2.52426e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.476068,23653.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.36666,23665.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.78486,48609.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,353.99,3.20521e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.449981,26710.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.364733,26714 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.53371,53478 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,206.6,2.04905e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.419152,28294.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.330603,28286.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,257.577,2.73938e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.419702,29649.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.336337,29603.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,184.479,2.01094e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.419377,30448.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.336491,30425 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.82156,38140.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.39604,81535 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.87415,42816 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.274495,30436.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.78339,50877 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.06535,36140.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.272036,30501.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.51078,35071.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.42547,31745.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1993,440895 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_167.txt deleted file mode 100644 index c3e83a0ac7288f9df2261d3bfc2eed5e5877e3ed..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,296.562,2.54573e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.483562,23826.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.389066,23837.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.87391,46458.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,352.899,3.1982e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.458154,26897 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.379659,26893.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.77367,53820.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,206.292,2.04068e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.430826,28503.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.346884,28484.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,256.01,2.73599e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.432797,29775.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.335576,29790.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,184.389,2.0146e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.428176,30532 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.335723,30501.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.66343,32065.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.37068,81886.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.88047,44132.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.293502,30505.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.62939,51094.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.06439,36277.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.276958,30669.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.42183,35266.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.508649,31890.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.754,450476 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_168.txt deleted file mode 100644 index 87a8b5b753cca9b81ea1f28369712055c2902022..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,298.737,2.56173e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.487766,23787.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.394545,23772.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.6904,47590.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,369.368,3.34402e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.443563,26817.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.376375,26837 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.54972,53705 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,205.062,2.02188e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.423229,28422.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.353867,28418.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,256.725,2.72743e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.440644,29779.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.349207,29725.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,182.983,1.99774e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.422934,30528.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.343504,30547.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.81525,36790.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.38699,81530.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.90984,44390.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.30305,30577.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.77261,51388 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.08207,36369 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.282924,30616.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.45893,35281.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.450915,31905.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.0269,442478 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_261.txt deleted file mode 100644 index 93838593b0e86eb9947bc0286ffada8c37d65bad..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,239.159,1.8778e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.500221,21907 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.425374,21899.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.93152,46327.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,319.478,2.69472e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.409328,24677.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.389622,24673.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.46425,49392.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,141.826,1.27388e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.383255,25669.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.337892,25673.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,189.207,1.81101e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.403869,26546.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.344977,26527.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,133.674,1.30657e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.385706,27047.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.341623,27047.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.67763,28436 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.2637,69883.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.90429,38818.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.281375,27105.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.6535,44900.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.06299,32098.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.280145,27204.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.46036,31217.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.462704,28331 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.1553,401965 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_262.txt deleted file mode 100644 index 14eb5aebdc866d3346d6865edc4dc056735a0042..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,240.369,1.8844e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.47601,21922.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.407965,21938.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.92321,44451.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,319.346,2.68915e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.40577,24650.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.367959,24658.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.37474,49381.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,141.115,1.27234e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.383088,25688.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.340407,25688.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,188.159,1.79488e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.399665,26542.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.347332,26550.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,133.456,1.30303e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.383908,27082.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.331986,27059.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.67921,27059.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.26349,69737.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.86217,37769.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.285144,27090.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.73779,44848 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.04095,32091.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.271524,27231.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.4478,31233.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.396132,28284.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.4397,403698 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_263.txt deleted file mode 100644 index 4d7d8bd0660e808ab0e16759a768bd1294db0947..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,283.479,2.34161e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.461636,23092 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.373643,23084.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.93579,47439.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,379.707,3.37608e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.417068,26186.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.394808,26185.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.92988,53769.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,177.266,1.69806e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.403147,27381.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.343039,27404.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,256.526,2.61356e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.411801,28373.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.339218,28351 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,178.12,1.84321e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.404037,28901 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.347307,28877.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.69885,30353.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.35126,76658 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.9629,41851 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.277427,28908.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.6767,48488.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,0.96418,34421 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.277983,29073.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.45029,33452 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.423179,30259 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.5578,426495 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_264.txt deleted file mode 100644 index ccb144aca9ef38452537d71bd0dd800618d4fe51..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,283.059,2.33068e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.461873,23114.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.381311,23118.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.78863,47515.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,379.461,3.38573e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.430194,26185.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.383231,26212.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.5791,53831.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,177.436,1.69892e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.402565,27384.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.339647,27381.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,257.399,2.62307e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.43002,28343.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.34787,28331.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,178.481,1.85985e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.408601,28912.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.33639,28893.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.79199,31830.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.87687,80616.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.9147,41782.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.28384,28931.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,5.02516,50517.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.0002,34359.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.282002,29023.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.55902,34428.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.365131,29122.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.2455,423237 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_265.txt deleted file mode 100644 index 8af78c59c5e7b00d25afbb5d78ef5b95709e043c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,283.607,2.34483e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.452094,23164.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.377304,23179.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.8533,46390 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,380.062,3.38763e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.430624,26235 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.378015,26253.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.61293,52555.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,177.731,1.70178e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.400748,27396.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.357381,27412 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,257.179,2.61661e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.412037,28351 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.345541,28351 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,178.761,1.86393e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.398309,28962.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.35326,28966.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.75246,33364.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.30033,77764.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.86243,40825.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.292518,28973.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.82387,48493.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.0437,34459.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.280441,29096.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.47374,33594 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.437816,30351.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.0961,422772 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_266.txt deleted file mode 100644 index d0e6e9a17ef4b3dfb59b8f9974ae1d8f215aae00..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,309.155,2.57416e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.442635,23297.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.388606,23309 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.81827,47869.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,413.862,3.71366e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.427103,26663 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.389618,26678.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.86912,53372.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,171.531,1.64968e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.416357,27856.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.353023,27849 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,240.222,2.46913e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.418022,28696.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.355481,28696.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,167.655,1.75969e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.413458,29210.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.352472,29165.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.78953,35131.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.97524,82807.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.90814,42220.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.304992,29205.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.97367,51161.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.00421,34737.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.286917,29374.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.54951,34882 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.376614,29461.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.423,424489 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_267.txt deleted file mode 100644 index 78ebfbbbb1ea01fb72513f32b0a08423a0258e98..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,309.406,2.5672e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.434609,23278.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.365573,23290.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.04474,46626 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,414.939,3.71638e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.430028,26594.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.382379,26606.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.81203,53236.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,171.216,1.65335e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.415865,27803.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.343807,27814.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,240.439,2.47009e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.418278,28666.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.337977,28659 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,168.62,1.74734e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.412798,29188 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.347718,29165 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.77458,30668 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.43128,78855.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.89549,42407.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.288448,29208.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.80043,49215.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.07115,34829 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.290028,29376 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.47451,33809.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.4427,30493.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.789,429834 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_268.txt deleted file mode 100644 index 0c2c340b509f603965d439f8617f109f2825e2e1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,309.613,2.57011e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.447346,23316.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.376549,23313 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.80858,46652.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,414.747,3.7171e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.43125,26606 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.385976,26629.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.46542,53344 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,171.413,1.66018e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.427141,27837.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.34727,27810.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,240.804,2.47861e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.421176,28731 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.348403,28734.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,167.967,1.75774e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.411128,29194.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.349682,29206.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.73172,32208 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.55088,79804.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.95532,42358.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.29102,29216.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.78787,50123.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,0.972817,34818.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.293918,29348.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.47703,33829 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.459557,30543.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.5514,428600 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_269.txt deleted file mode 100644 index 16cdb9e385561ba7058ccc6ff9c2a33d56864f28..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,312.658,2.60353e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.447026,23362 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.37452,23369.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.84804,46754.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,414.875,3.72555e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.429964,26648.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.39534,26644.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,4.53306,53328.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,171.436,1.64858e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.414898,27780.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.348965,27795.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,241.172,2.47577e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.426834,28749.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.359078,28726.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,167.408,1.72382e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.410566,29215 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.343347,29180.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,2.7337,33650 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,7.39321,78142.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,1.86438,41160.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.283916,29246.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,4.7498,48955.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.05844,34782 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.278342,29430.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.46566,33851.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.461394,30573.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,37.4792,430827 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp32_perf_fp32_120.txt deleted file mode 100644 index 649a1f51be1545a963b1c6fb717178a837fc701e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/alexnet_imagenet/alexnet_imagenet_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,81 +0,0 @@ -Conv1,650.457,5.73552e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.474304,25907.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.332364,25911.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,8.92677,90320.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv2,1057.96,9.77033e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.453234,28497 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.372524,28497.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool2,8.27002,90575.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv3,286.088,2.90251e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.456562,30895.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.377951,30887.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,414.867,4.62072e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.430777,32026.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.258464,32044.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,274.551,3.07434e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.429369,32951.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.375225,32927.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Pool3,4.34963,65850.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Mul1,16.8142,166920 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add6,3.2153,60129.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.388544,32831.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Mul2,5.96569,71879 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add7,1.62928,45759.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.276691,32854.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Mul3,1.64413,41948.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add8,0.586707,35509.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Softmax1,76.6594,908040 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_120.txt deleted file mode 100644 index ab5123cc67a86fc4a620d0a8bcca343e923258dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,53.2416,410603 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.200686,10812.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.35149,30661.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.16412,10836 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,141.815,1.1076e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212245,11246.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.24734,22510.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.154683,11261.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.798899,12588.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.399194,12067 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.12763,11294.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.19088,11302 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.159733,11343.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.132791,11366.9 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.12994,12188.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_151.txt deleted file mode 100644 index f61512071680cf0864c0c16a8e7b21a8a6aee39c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,48.1242,345225 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.194072,9955.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.02115,23780.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.170353,10001.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,76.2011,563364 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.208235,10230.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.09816,20467.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.151428,10246.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.724709,11214.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.400649,10995.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.127191,10307.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.15107,10324.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.120308,10366.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.134987,10383.8 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.00837,10992.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_152.txt deleted file mode 100644 index a3c51be3cffa7c274431e82fad78197aa6d1836f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,48.9399,350989 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.197227,9884.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.11865,27001.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.181156,9936.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,77.7687,570069 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.22133,10144.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.04445,20337.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.161111,10183.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.71334,11153.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.396691,10940.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.128478,10231.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.25817,10834 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.159803,10269.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.134676,10292.2 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.0811,10930.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_153.txt deleted file mode 100644 index 2dd6e9a88a82649c5b2ab418e598432bec261bf6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,46.6348,325536 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195595,9778.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.12681,24863 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.176449,9806.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,77.1562,559152 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.210802,10053.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.14929,20128.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.158446,10082.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.692079,11008 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.388636,10829.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.129313,10124.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.18317,10147.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.158939,10166 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.133499,10167.9 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.05805,11298.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_154.txt deleted file mode 100644 index 389e60070dd2731939dde771502b55c5a437486f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,48.0157,339125 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.194162,9707.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.24845,26882.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.16954,9740.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,78.0062,562935 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.207128,9963.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.24529,19938.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.153448,9986.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.719999,10933.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.409574,10722.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.132369,10066.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.23904,10084 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.157931,10093.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.140488,10118.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.10729,11706.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_155.txt deleted file mode 100644 index ca03841e4af51b8e30ce93734eda3674e7eca805..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,51.1264,364502 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.204514,9953.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.1248,26736.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.181972,9987.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,89.6018,660985 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219131,10209.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.19727,20441 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.159291,10238.7 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.715628,11189.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.407411,11204.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.130753,10273.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.20173,10298 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.160679,10311.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.140894,10336.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.10275,11773.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_156.txt deleted file mode 100644 index 1f4c7f682568d43d7e4708052ff22280e1198057..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,52.9157,380778 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.203973,10001.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.24634,29547.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.164356,10036.1 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,89.8067,667021 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.224731,10238.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.22599,20477.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.159969,10246.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.719423,11220.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.415769,11277.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.143569,10311.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.21795,10340.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.17548,10341.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.137815,10366.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.09577,12943.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_157.txt deleted file mode 100644 index 4abd2ff1d6abfa5a23c9909030d16f915d6d44c6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,52.7686,392792 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.19764,10428.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.05472,27189.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.169716,10457.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,107.438,830153 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214123,10726.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.00577,21072.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.155681,10755.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.798201,12047.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.402019,11556.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.123946,10812.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.19996,10820 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.164612,10846.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.127936,10856.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.0511,12358.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_158.txt deleted file mode 100644 index ae604d7bdd9f6e85d7a77e9c39d29259b0bdfdff..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,49.0993,342331 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.203544,9711 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.05407,24425.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.179067,9764.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,90.8712,654519 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221912,9948.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.2273,19913.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.159806,9969.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.716977,10902.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.400351,10695.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.134183,10038 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.23569,10095.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.168154,10116.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.143252,10126.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.07097,11713.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_159.txt deleted file mode 100644 index 955744472efb7e677ce14f28f589aa25087e4766..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,50.8444,350982 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.209797,9758.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.27614,28217.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.164961,9789.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,90.4073,655332 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.223419,10003.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.27863,20033.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.158462,10049.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.702981,11026.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.410073,10794.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.136487,10114.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.21371,10122.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.133022,10131.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.138929,10133.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.22311,12791.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_160.txt deleted file mode 100644 index 5f142c7920ec552c8b3f8c1a872604b55b6e63b1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,50.7658,368438 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195457,10169.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.35397,30084.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.185678,10215.8 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,106.684,801549 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.213777,10460.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.24265,20941.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.155121,10488.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.849846,11984.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.372124,11263.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.132897,10551.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.22824,10855.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.133111,10606.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.133147,10626.9 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.15523,11644.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_161.txt deleted file mode 100644 index 092b6f5d79acc04e4622f26006ed1a8de3d1ccea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,54.4765,397281 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.198766,10384.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.2986,30571.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.157898,10413.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,105.108,811439 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.206196,10702.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.20963,21433.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.153771,10728.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.821688,12009 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.417266,11780.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.128219,10780.3 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.21484,10801.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.168548,10802.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.128519,10837.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.06899,11700.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_162.txt deleted file mode 100644 index 3ca4277de1d0258709f4d44faf64d9e59937a90f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,55.2578,412629 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.201061,10337.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.26118,28870.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.166932,10368.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,107.154,823003 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.211173,10624.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.21668,21291.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.156635,10666.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.828248,11977.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.415174,11726 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.131767,10721.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.2329,10746.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.169524,10763.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.134702,10786.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.0436,12095.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_163.txt deleted file mode 100644 index dbfbf37ebc34a0e18fe7b33d296b0e60811e87d5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,55.6706,411214 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.199019,10259.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.40361,29808.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.162872,10296 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,117.265,891235 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.209499,10559.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.27431,21129.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.155639,10569.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.867055,12131.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.355961,11384.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.129613,10613.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.25566,10645.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.139422,10668.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.135556,10685.9 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.03205,12631 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_164.txt deleted file mode 100644 index 99b2446d3c9a86427679948b7869c3ce4cd645c7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,54.555,404489 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195633,10248 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.32372,29348.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.158491,10299.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,116.045,885287 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212567,10548.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.25185,20535.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.154827,10586.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.813368,11876.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.421801,11632.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.132619,10628.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.22149,10645.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.168667,10685.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.140765,10703 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.10036,12345.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_165.txt deleted file mode 100644 index e1c00548bba6f5bf5cd989f354f6855765119752..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,51.8461,378176 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.205409,10146.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.29647,27806 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.175278,10179.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,107.034,806291 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220113,10469.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.22665,20951 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.156977,10479.3 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.81621,11727.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.401843,11518.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.128913,10538.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.19511,10555.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.174436,10587.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.131604,10599.2 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,0.973702,11235.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_166.txt deleted file mode 100644 index d0c78df48e4b19fbd1da745db9860b75fb3d81c5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,51.845,377220 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.202846,10185.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.31109,28424.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.167067,10213.5 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,106.794,808042 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.210497,10525.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.26955,20816.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.157351,10544.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.762453,11546.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.423516,11571.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.135339,10587.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.23369,11207.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.14531,10629.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.139399,10631.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.18922,11353.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_167.txt deleted file mode 100644 index 4582f6b78296b5f0f5db7168da8bc20a7c8b57a8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,51.2702,371429 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.199473,10120.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.31699,29873.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.158638,10168.4 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,113.259,848992 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.2082,10447.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.21405,20910.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.154529,10462.8 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.818962,11718.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.414665,11511.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.132638,10531.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.17942,10533.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.168603,10544.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.13177,10573 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.13253,11970.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_168.txt deleted file mode 100644 index 55d8d4862bceea092624fef4be804238cafe2d6e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,51.2411,369185 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195915,10109.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.21625,29217.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.164785,10137.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,113.493,848930 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.211281,10412.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.27259,20860 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.160157,10443.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.865858,11955.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.401084,11221.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.132119,9920.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.20541,10189.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.125783,9991.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.134711,10000.9 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.26711,10830.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_261.txt deleted file mode 100644 index ceeccbe71a1ee4bc7891d2898389c43ffaaae102..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,24.6817,175003 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.17307,9823.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.04969,26588.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.122599,9861.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,92.441,682924 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.198039,10233.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.14033,20486.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.161636,10260.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.715854,11188.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.393919,10990.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.127492,10331.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.26433,10350.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.163857,10375 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.137178,10390.3 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,0.975836,10632 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_262.txt deleted file mode 100644 index ae7893234b4a854afdf2ab6a93804b472d7bde8d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,24.6179,176890 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.166737,9825.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,2.9676,24858.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.11938,9854 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,91.1076,673560 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.191941,10287.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.09591,20588.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.164788,10321.9 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.67771,11241.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.387586,11046.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.13265,10342.8 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.2241,10365.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.165851,10396.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.145143,10421.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.00152,10659.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_263.txt deleted file mode 100644 index bd428223fa0b68dc3eb8d0ec94c2abd03e912b98..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,26.2544,191225 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.170372,10033.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.13531,28341.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.156977,10071.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,110.735,835247 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.196139,10494.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.22919,20410.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.15539,10517.1 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.846818,11995.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.367052,11283.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.127601,10572.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.26546,10844.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.124983,10625.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.136359,10627.5 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.03664,11336.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_264.txt deleted file mode 100644 index f6809874580e3592b06fd43812ddf02a388428b2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,27.192,198660 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.170811,10189.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.11118,28018.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.161054,10229.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,109.608,835120 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.194372,10661.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.27415,21780.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.159457,10692 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.815076,12175.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.38781,11445.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.128907,10711.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.19239,10745.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.173306,10782.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.134577,10787.7 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.073,11933.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_265.txt deleted file mode 100644 index 525e1c1bceab761098398811fef3d6f67c8084d8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,27.1287,199754 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.177319,10183.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.09268,27524.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.15875,10220.3 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,109.718,837972 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.195335,10667.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.10515,21350 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.15731,10684.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.763332,11923 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.396937,11441.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.12577,10722.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.21391,10741.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.169403,10758.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.131419,10768.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,0.991177,11341.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_266.txt deleted file mode 100644 index a7e4b208e395f44cf0a89ecf6755b04b22a0705e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,28.1301,210769 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.175569,10256.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.25761,30029 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.153194,10300.6 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,119.738,911657 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.192788,10749.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.21573,21510.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.145374,10768.5 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.816036,11993.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.415413,11776.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.128081,10810.5 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.26205,10829.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.161025,10856.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.13923,10894.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.00547,11532.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_267.txt deleted file mode 100644 index 3948a143506d5d0eaeabd66c1d60b04fd449f36b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,28.2398,208264 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.175534,10338.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.12943,27848.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.163847,10395.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,118.691,913277 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.197476,10825.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.11307,21657.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.160868,10845 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.809361,12336.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.360213,11630.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.133012,10923.2 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.22352,10932.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.143597,10957.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.133201,10972.6 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.0499,13248.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_268.txt deleted file mode 100644 index 2e3cfc46750f56cafde4c30b1b97d33a7ee5d2a1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,28.1788,210858 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.176807,10342.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.16951,27912.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.165633,10369.2 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,118.568,907059 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.195265,10820.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.1375,21644 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.155005,10825.8 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.78339,12064.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.405327,11619 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.131358,10881.1 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.21585,10925 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.144199,10955.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.13563,10963.1 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.16035,11521.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_269.txt deleted file mode 100644 index 1910111015a911e4af5f0173caeee0686320fee5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,28.5102,202387 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.180833,10300.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,3.22101,28427.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.169076,10328.9 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,118.634,917354 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.198001,10782 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.16212,21577.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.157057,10795.4 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,0.787105,12035.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.407577,11583 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.133399,10844.9 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,1.22979,10879.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.165399,10896.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.142606,10913.4 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,1.13882,11704.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp32_perf_fp32_120.txt deleted file mode 100644 index df787165b74a3bb97015b4f0bb9c7afa56c5c6d8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/lenet_keras/lenet_keras_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,45 +0,0 @@ -Conv1,67.8081,567302 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.213878,12094.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Pool1,4.51207,41956.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Tanh1,0.210908,12134.7 -Tanh1_f2h,0,0 -Tanh1_h2f,0,0 -Conv2,203.117,1.77821e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.207398,12651.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Pool2,2.85643,30378 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Tanh2,0.178364,12683.6 -Tanh2_f2h,0,0 -Tanh2_h2f,0,0 -Mul1,1.98613,17351.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add3,0.617506,14584.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Tanh3,0.141305,12708.4 -Tanh3_f2h,0,0 -Tanh3_h2f,0,0 -Mul2,0.396533,12723.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add4,0.136624,12725.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Tanh4,0.131891,12770.7 -Tanh4_f2h,0,0 -Tanh4_h2f,0,0 -Softmax1,4.99293,52896.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_120.txt deleted file mode 100644 index 5d79cb1b4bd9d9c23a307f85b2d265712c3672cb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,73.9502,645096 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.247835,12623.1 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.164623,12625 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,294.637,2.65058e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.258104,12842.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.169119,12853.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,132.495,1.19912e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.253916,12988.5 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.161241,12980.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,151.749,1.43286e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.255339,13213.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.176837,13213.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,70.4482,661533 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.239563,13236.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.154121,13223.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,324.851,3.12101e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253886,13583.1 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.160607,13577.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,96.3067,924399 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.243679,13037.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.155141,13024.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,87.4979,863312 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.243333,13854.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.152028,13848.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,37.7534,377593 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.181286,13825.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.146502,13817.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,191.548,1.93282e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.250984,13866.8 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.154498,13859.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,52.3825,519540 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.19949,13929.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.150866,13924.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,51.9763,533162 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.187788,13946.9 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.145609,13939.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.4328,231407 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.162892,13906.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.14126,13907.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,132.226,1.36954e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.236005,14206.9 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.1487,14183.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,32.2877,327249 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.179397,14208.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.143974,14193.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,132.291,1.39504e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.234907,14439.9 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.145369,14432.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,32.1337,336438 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.185467,14453.3 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.143277,14453.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,132.577,1.42292e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.239714,14618.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.147318,14611.2 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,32.3695,341956 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.184789,14667 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.146847,14651.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,132.132,1.43444e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.236386,14816.4 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.146639,14801.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,32.1764,345701 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.183279,14812.9 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.151174,14805.3 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,131.917,1.44904e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.236888,14948.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.150729,14933 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,32.167,345983 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.182521,14187.1 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.144009,14940.5 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,65.8819,728654 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.162079,14775.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.135289,14766.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.9825,193420 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.406036,16917.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.103993,14697.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,220.662,2.32068e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.22685,14113.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.14111,14113.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,32.4289,340589 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.186716,14063.5 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.124931,14048.1 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.262235,14036.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00585,21818.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0685114,14032.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.668,147250 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_151.txt deleted file mode 100644 index dc27bd9b0cefcba92a6e611bbd04e7abdcf043e4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,65.389,551425 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.242395,11938.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.162293,11947.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,150.895,1.29446e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.2506,12167.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.155247,12171.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,123.348,1.09867e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.236335,12533.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.151896,12540.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,78.0618,719279 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.237803,12829.5 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.154418,12818.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,63.5923,594937 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.219042,12945.5 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.14911,12938 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,164.157,1.55489e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.249452,13138.3 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.156713,13134.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,76.2518,725387 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.236447,13396.3 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.158629,13400.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,44.9147,443219 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.217602,13519.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.149452,13505.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,34.1333,341062 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.191435,13526.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.142422,13526.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,99.2134,991306 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.242485,13607.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.147484,13591.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,41.2043,408958 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.216159,13685.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.143964,13687.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,32.211,331089 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.196456,13729.5 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.134588,13722 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,18.5924,193537 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.181948,13723.9 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.106428,13716.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,109.057,1.0975e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.220994,13614.9 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.146076,13607.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,26.7251,268025 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.185087,13633.5 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.131715,13624.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,109.704,1.10027e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.224808,13509 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.140105,13499.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,26.6311,267766 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.186693,13562.2 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.122294,13562.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,109.839,1.09124e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.216386,13469.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.141992,13462.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,26.6122,264636 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.190396,13481.1 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.119471,13479.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,108.943,1.07929e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.216264,13396.9 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.138821,13395 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,26.6761,266045 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.165205,13427.3 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.125657,13419.7 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,109.453,1.0827e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.221314,13372.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.142541,13349.4 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,26.696,263889 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.159327,13373.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.129939,13373.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.2144,426715 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.139647,13281.1 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.128982,13273.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.3333,132384 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.128908,13269.6 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0828864,13263.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,141.034,1.34819e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.207995,13055.3 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.135052,13055.3 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.1664,254805 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.14173,13030.6 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.094348,13026.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.245727,13024.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.99098,19884.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0685148,13002 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.2567,137044 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_152.txt deleted file mode 100644 index 8964f1786a46b940e964e341614251865b47d4e2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,65.3376,550172 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.236098,11944.7 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.159887,11946.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,150.596,1.2911e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.259371,12169.5 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.161516,12169.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,123.092,1.08442e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.240299,12538.9 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.152095,12533.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,78.451,723292 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.234958,12825.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.15686,12829.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,63.581,596887 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.215909,12951 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.152786,12949.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,164.393,1.56285e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.251448,13130.5 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.163087,13132.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,76.3737,720128 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.233675,13405.9 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.157801,13405.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,45.5004,448326 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.218526,13506.3 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.146137,13508.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,34.1937,339642 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.194184,13506.1 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.14062,13506.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,99.0062,986539 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.23893,13607.7 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.15061,13615.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,41.238,409989 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.211727,13689.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.15165,13697.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,33.4938,341257 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.200818,13716.2 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.136246,13716.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,18.7081,194156 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.183381,13707.1 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.106198,13694 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,109.543,1.10039e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.218457,13575.3 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.142188,13575.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,26.555,267700 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.184959,13628.5 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.124883,13621.1 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,109.565,1.09646e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.212393,13512.9 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.140883,13505.3 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,26.5678,268497 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.185747,13548.9 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.122438,13533.7 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,109.932,1.09239e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.232188,13454 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.143193,13446.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,26.6109,265493 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.155186,13482.7 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.126921,13482.7 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,108.969,1.08043e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.217931,13385 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.141852,13377.3 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,26.5757,264539 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.155113,13423.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.122972,13421.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,109.488,1.08123e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.217707,13357 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.145151,13347.5 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,26.63,262356 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.153769,13360.3 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.127503,13360.3 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,42.3842,426475 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.141138,13286.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.127612,13286.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.1488,133664 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.123285,13246.7 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0761851,13246.7 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,139.682,1.35442e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.199247,13059.1 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.139247,13059.1 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,25.6908,251420 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.145958,13032.5 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0989915,13015.4 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.234261,13015.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.983277,19896.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0692538,12988.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.3946,136283 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_153.txt deleted file mode 100644 index 39c3c4bfc3c45ad536dd32d24c599bbfa1eca85b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,59.192,492599 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.230488,11765.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.162552,11765.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,146.043,1.23587e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.248728,11990.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.160364,11984.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,113.012,984004 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.23555,12316.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.171029,12318.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,77.4188,700262 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.239547,12557.5 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.158214,12553.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,61.8709,566799 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.215836,12673.7 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.152092,12675.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,162.923,1.51363e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.245311,12839.9 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.158648,12839.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,74.5823,688044 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.229919,12388.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.152201,12396.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,45.2625,433952 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.217247,13213.5 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.151206,13213.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,34.2165,334105 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.191567,13197.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.143942,13197.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,99.0185,964889 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.239947,13256.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.149024,13251 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,41.4294,388095 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.213621,13344.9 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.148351,13346.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,32.7904,326992 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.198322,13398.5 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.141132,13390.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,19.1902,193601 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.180341,13379.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.114479,13369.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,109.805,1.07841e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.21853,13285.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.143682,13277.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,26.9344,265827 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.192278,13317.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.125276,13310.1 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,110.593,1.08168e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.213032,13214.9 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.142434,13207.1 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,26.9675,267304 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.187317,13279.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.118931,13270 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,110.601,1.07806e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.216351,13167.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.147916,13167.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,26.9098,265035 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.154441,13202.1 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.121635,13190.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,109.682,1.06469e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.212578,13116 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.140905,13108.3 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,26.9461,262889 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.15766,13169.7 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.121433,13167.7 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,110.175,1.06465e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.213787,13087.3 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.142697,13087.3 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,26.9743,263452 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.160233,13110.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.12678,13108.3 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.3574,425833 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.136303,13040 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.128598,13038.1 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.3003,131934 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.12925,13047.5 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0788378,13032.3 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,140.09,1.3326e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.209426,12877.9 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.13477,12877.9 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.0744,250139 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.137375,12849.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0982747,12841.7 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.237528,12830.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.966852,19076.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.073513,12807.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.4019,134757 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_154.txt deleted file mode 100644 index af20d986d021bcf29c10ad5a89faede4cf16115b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,59.7437,495554 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.234494,11713.7 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.158329,11715.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,147.27,1.22824e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.251391,11941.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.158031,11943.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,113.406,985062 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.232309,11610 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.150818,12241.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,77.1911,695760 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.239986,12551.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.159061,12549.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,61.5726,562432 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.217922,12654.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.154696,12656.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,162.169,1.49227e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253391,12843.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.159193,12845.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,74.2893,687068 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.234261,13046.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.150271,13046.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,45.0143,431818 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.226866,13199.5 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.149919,13199.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,34.0262,328298 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.194085,13204.8 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.144054,13199.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,98.5223,958944 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.235368,13271.9 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.149695,13266.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,40.8996,394590 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.210469,13381.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.151861,13383.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,33.4273,331360 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.201967,13387 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.137839,13387 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,18.9907,190974 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.179532,13398.7 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.112198,13398.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,109.752,1.07808e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.216092,13300.5 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.143657,13292.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,26.9502,268056 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.18933,13323.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.118691,13321.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,110.201,1.07843e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.217835,13226.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.139231,13218.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,26.902,265461 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.194255,13264 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.120886,13262.1 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,110.725,1.0789e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.217611,13194.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.138386,13194.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,26.928,265094 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.188508,13192.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.117836,13190.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,109.97,1.06774e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.220907,13127.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.141388,13127.6 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,26.9667,264174 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.188917,13177.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.118879,13175.3 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,110.078,1.06532e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.214929,13077.8 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.141698,13077.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,27.0638,264730 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.18982,13115.9 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.118863,13115.9 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,42.8797,419154 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.178261,13039.9 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.131683,13039.9 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.0624,130440 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.153228,13019 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0731229,13019 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,140.751,1.33412e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.204744,12845.3 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.138002,12828 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,25.6298,245932 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.171794,12824.5 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.09951,12807.3 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.232002,12799.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00246,19515.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.067734,12790 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.3304,135313 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_155.txt deleted file mode 100644 index 3cd198c77ec69951041ad8d1e114a5a5df5072c8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,71.1138,599921 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.237858,11925.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.156825,11929.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,187.397,1.62341e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.252718,12186.7 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.159817,12173.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,135.388,1.17338e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.242434,12581.7 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.154873,12576.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,98.0578,895295 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.241234,12892.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.158828,12869.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,70.626,663698 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.227765,13028 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.147266,13029.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,218.255,2.09237e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.250332,13410 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.156972,13411.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,88.0488,844329 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.233976,13637.7 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.154652,13630.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,55.1219,553498 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.229189,13733.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.147564,13735.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,36.8881,373914 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.205544,13750.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.1411,13758.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,128.082,1.29008e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.245151,13759.7 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.156329,13761.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,46.1832,445284 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.222834,13870.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.148393,13872.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,32.5254,333762 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.196988,13884.1 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.137001,13874.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,18.3932,190258 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.173331,13878 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.117833,13862.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,106.178,1.0807e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.217778,13743.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.145286,13728.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,26.2919,265060 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.18885,13060.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.124175,13741.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,106.833,1.07931e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.21707,13623.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.143596,13623.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,26.2662,265123 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.180648,13648 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.124159,13638.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,106.912,1.07212e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.216444,13541.2 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.144072,13539.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,26.4088,264220 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.194187,13542.9 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.1283,13535.3 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,106.146,1.05831e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.211371,13462.3 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.146716,13454.7 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,26.315,261815 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.184258,13495 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.127935,13470.1 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,106.704,1.05782e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.221704,13406.8 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.142434,13391.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,26.3237,260152 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.188457,13414.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.131734,13406.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.3881,435180 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.170901,13338.3 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.127836,13315.4 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.6722,134732 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.158857,13315.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0751612,13288.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,141.205,1.3677e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.198412,13051.5 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.134025,13049.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.3303,255393 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.167004,13024.9 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0932922,13013.5 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.238219,13011.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00843,20004.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0678684,12990.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.4966,136714 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_156.txt deleted file mode 100644 index 8019eb1b69a25d2c87a036fe7cac6864eaecedea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,71.2761,608731 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.244776,12084.3 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.158962,12088 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,188.912,1.64896e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.255058,12293.5 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.162853,12295.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,136.394,1.19324e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.238479,12658.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.158102,12652.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,104.663,967511 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.243407,12949.9 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.159599,12955.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,72.7473,673075 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.230959,13060.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.156332,13053.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,231.279,2.19977e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253381,13464.3 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.17527,13470 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,91.4477,882583 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.232638,13666 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.154396,13669.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,55.869,562471 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.223273,13816.7 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.15093,13790 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,37.16,376760 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.20005,13791.8 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.143135,13791.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,126.416,1.26317e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.242568,13828.7 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.15438,13828.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,46.471,461018 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.221775,13910.5 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.148053,13902.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,46.0725,473517 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.203042,13897.1 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.143273,13889.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.7659,234615 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.185301,13891.7 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.133001,13884 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,150.933,1.52903e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.226543,13816.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.14726,13816.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,31.4032,317298 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.201442,13834.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.14229,13835.1 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,151.7,1.5283e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.236744,13765.1 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.145177,13757.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,31.5003,318316 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.204392,13814.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.147897,13784.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,151.53,1.52452e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.230943,13756.2 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.144997,13748.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,31.5601,316690 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.199858,13787.9 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.140754,13772.7 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,151.3,1.52384e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.235752,13709.1 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.143299,13701.7 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,31.5201,318085 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.201775,13786.5 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.138499,13771.3 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,151.321,1.52065e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.230063,13711.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.145682,13711.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,31.5567,316267 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.204805,13747.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.14038,13732.4 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,42.7856,438524 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.176604,13635.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.131766,13620 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.6054,139742 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.150604,13615 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0857691,13588.2 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,140.048,1.38348e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.201449,13321.5 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.13701,13313.8 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.1387,260393 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.174642,13276.5 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0936986,13264.9 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.242418,13255.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.997105,20473.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0684222,13234.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7298,139782 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_157.txt deleted file mode 100644 index d8489f200d082fa1d51b5c5d12c1827951a4c4b3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,72.46,609354 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.240213,12032.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.162009,12032.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,193.444,1.66746e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.254498,12239.9 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.157048,12241.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,137.521,1.22085e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.248389,12658.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.164284,12644.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,103.363,961789 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.245477,12934.7 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.160453,12928.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,72.2451,654769 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.234348,13047.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.165647,13047.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,229.582,2.2071e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.251861,13502.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.160018,13504.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,91.5788,884955 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.237893,13725 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.15511,13728.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.2835,627100 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.230226,13832.7 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.155564,13830.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.4552,397671 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.202898,13832.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.146067,13832.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,137.42,1.38913e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.242076,13900.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.157247,13900.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,50.6253,504818 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.223986,13993.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.149266,13978.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.7706,467971 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.214168,13992.5 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.146117,13984.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,21.7253,226389 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.193132,13951.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.13926,13936.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,150.085,1.52527e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.233119,13863.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.150428,13852.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,30.5291,308692 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.204037,13907.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.142162,13892.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,150.359,1.52327e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.231486,13824 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.146287,13808.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,30.7597,309798 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.206306,13867.2 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.148242,13859.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,150.606,1.52351e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.230056,13759.7 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.146265,13759.7 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,30.5411,306875 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.200361,13818.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.140364,13818.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,150.637,1.51677e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.230936,13747.9 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.144723,13732.6 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,30.7032,304003 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.205586,13784.3 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.144537,13784.7 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,150.371,1.51111e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.225404,13721.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.147328,13721.5 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,30.6697,303778 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.202386,13765 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.141798,13750 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,66.0462,671959 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.198402,13616.1 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.137314,13614.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.6425,169765 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.175077,13597.3 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.113548,13589.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,221.249,2.15435e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.21949,13160.3 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.142856,13152.7 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.0835,301232 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.190239,13165.6 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.127974,13150.4 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.280715,13140.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.999888,20340.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.075977,13120 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.6519,138176 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_158.txt deleted file mode 100644 index 90ffe2f12a60802fc29a7089ae60cd65af0bf6dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,67.3077,556049 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.238328,11681 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.163951,11682.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,184.143,1.55929e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.247538,11908.5 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.162972,11910.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,126.709,1.09389e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.234011,12236.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.157218,12230.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,96.8968,872072 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.247083,12522.7 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.157017,12536 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,68.2742,621439 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.22669,12633.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.152831,12639.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,216.566,2.01758e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.245538,13050.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.158569,13052.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,85.8555,805974 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.232828,13260.7 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.159292,13268.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,55.0325,538310 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.226913,13382.1 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.149215,13374.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,36.76,363572 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.196174,13383.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.142963,13383.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,128.515,1.26005e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.239979,13383.1 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.148604,13383.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,45.7883,443150 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.216943,13459.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.145593,13471.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,33.0725,330537 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.197926,13499.7 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.137593,13492 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,18.5424,187872 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.175276,13494.1 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.114034,13482.5 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,108.862,1.07927e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.212722,13372.3 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.139647,13362.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,26.5576,259574 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.187423,13395 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.124197,13387.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,109.336,1.07399e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.214965,13263.7 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.141286,13263.7 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,26.4503,261680 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.186751,13309.5 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.127776,13299.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,109.411,1.07093e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.214437,13206.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.140688,13206.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,26.5886,259645 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.188684,13219.9 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.128895,13219.9 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,108.548,1.0561e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.212373,13123.7 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.14294,13114.1 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,26.4395,256976 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.190444,13164.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.12574,13154.6 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,109.135,1.05882e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.211669,13070.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.142716,13070.5 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,26.4033,255198 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.1842,13097.1 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.125782,13087.5 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.8395,430786 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.174146,13004 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.157049,12994.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.6335,134842 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.16207,12988.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0832412,12983.1 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,139.889,1.32864e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.22301,12835.3 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.138044,12804.7 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.3868,253192 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.169272,12810.7 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0911609,12795.3 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.236268,12778.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00093,19650.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.064918,12753.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.5151,134194 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_159.txt deleted file mode 100644 index f516e0ae4c0a2d7f01b2ed15147ad8d2d14a6eff..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,65.3362,550872 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.23892,11881.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.163554,11881.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,183.873,1.57523e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.249694,12067.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.15319,12063.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,126.067,1.09797e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.238168,12364.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.15277,12358.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,102.987,934710 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.251384,12637.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.156582,12641.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,70.0595,632959 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.228197,12755.7 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.162607,12742.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,228.783,2.12873e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.248139,13146.3 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.1567,13140.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,88.9911,839060 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.236088,13360.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.15157,13353.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,55.374,543389 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.228302,13461.7 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.146223,13463.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,36.8056,364843 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.205036,13448.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.143772,13448.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,128.391,1.26181e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.241208,13441.1 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.148406,13441.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,46.0282,448468 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.213861,13495.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.156479,13495.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.5721,458472 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.204981,13505.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.140918,13506 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.7383,231087 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.1858,13490.9 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.133187,13490.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,149.135,1.47567e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.229406,13452.5 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.139452,13441.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,31.8132,312152 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.196991,13491.1 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.136844,13491.1 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,149.39,1.47069e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.233442,13433.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.142633,13433.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,31.8094,312081 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.201141,13486.9 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.136691,13464 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,149.274,1.46842e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.226869,13431.5 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.14517,13416.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,31.806,311953 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.202188,13467.8 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.136435,13465.9 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,149.051,1.46773e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.225705,13406.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.144131,13399.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,31.8468,312188 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.206552,13448.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.142217,13448.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,149.405,1.46552e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.228936,13410.7 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.144601,13408.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,31.9564,313028 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.200817,13441.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.142169,13433.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.1695,434683 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.139756,13314.3 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.132351,13308.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.6361,140148 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.146761,13308.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0831129,13293.5 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,141.342,1.36962e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.203528,13087.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.135596,13072.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.1524,257254 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.163311,13041.5 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0935357,13035.5 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.234783,13033.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00197,20208.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0691773,13017.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.6763,136774 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_160.txt deleted file mode 100644 index c5475d1abb9ac9341738791863861927e48b615e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,67.2762,561170 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.244971,11778 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.159462,11776 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,186.843,1.58778e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.25467,12003.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.155311,11995.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,126.062,1.07634e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.236984,12320.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.155788,12322.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,100.552,898784 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.245582,12602.9 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.155635,12604.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,67.8345,619418 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.223733,12736.7 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.152607,12723.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,225.203,2.10712e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.254111,13166.3 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.156988,13166.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,86.4242,812090 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.234776,13374.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.152287,13366.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,61.3563,604222 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.230159,13505.7 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.15541,12809.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.2153,379525 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.205781,13465.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.14428,13459.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,136.112,1.34823e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.248104,13534.9 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.157125,13534.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,50.3802,490971 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.222866,13590.5 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.15254,13583.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.5792,442030 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.207859,13594.9 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.144908,13572 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,21.8041,220761 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.183919,13585.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.137727,13559.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,147.762,1.46669e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.234757,13515.9 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.143222,13500.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,30.8424,301925 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.200748,13561.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.138672,13529.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,148.326,1.46578e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.229147,13479.1 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.145971,13471.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,30.8856,302761 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.166258,13498.3 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.142009,13490.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,148.208,1.46201e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.225336,13465.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.144601,13450.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,31.0143,303698 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.166949,13500 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.136172,13490.5 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,148.247,1.46123e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.226216,13410.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.143381,13408.7 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,30.8179,300303 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.199547,13456.3 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.138137,13441.1 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,148.226,1.45887e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.229896,13427.7 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.147097,13420.1 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,30.869,300279 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.166249,13443.1 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.141752,13431.7 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,65.8585,656518 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.198364,13322.2 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.135327,13314.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.807,170841 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.146767,13308.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.105954,13306.5 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,221.517,2.11754e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.220639,12969.1 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.138908,12951.7 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.8758,302433 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.154178,12940.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.129292,12931.1 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.274024,12931.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.982346,19414.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0827833,12923.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7055,136431 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_161.txt deleted file mode 100644 index ec4bf136fd09893c177a6bf1002292e43a45cb36..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,73.8414,631151 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.242539,12072.7 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.16669,12064.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,204.014,1.77199e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.255566,12266.7 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.155884,12259 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,141.638,1.24598e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.244312,12597.7 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.154118,12605.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,109.314,1.01021e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.244891,12856.5 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.154319,12843.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,74.6864,695116 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.229676,12984.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.15126,12980.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,241.463,2.28995e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253054,13320 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.153273,13322 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,95.1088,896183 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.239807,13546.3 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.156552,13534.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.5937,621998 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.228834,13629.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.149055,13623.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.9155,389179 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.21284,13627.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.148124,13629.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,137.782,1.37559e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.365959,14374.3 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.155062,13697.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,51.2204,504730 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.228258,13752.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.147311,13767.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,44.3395,442975 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.211893,13765.7 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.13869,13765.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.061,224984 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.182456,13764 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.131964,13756.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,146.744,1.474e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.266543,14269.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.139257,13677.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,30.8873,310956 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.198693,13723.5 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.137286,13715.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,146.728,1.46661e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.228526,13677.1 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.140067,13677.1 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,31.1362,304718 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.203583,13720.3 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.137717,13720.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,146.902,1.46867e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.229548,13669.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.139993,13662.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,30.9505,308310 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.201471,13709 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.138271,13701.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,146.573,1.46703e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.227573,13643 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.142895,13635.3 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,30.9968,309732 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.201286,13709.4 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.134083,13701.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,146.49,1.46573e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.246264,13661.8 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.145276,13638.9 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,31.0376,311391 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.199903,13678.7 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.135753,13671.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.7408,447640 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.171039,13570.5 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.126866,13555.3 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.8324,147999 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.149698,13526.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0732701,13526.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,141.664,1.3947e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.198121,13262.6 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.133129,13260.7 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.505,266031 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.172438,13224.3 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0853532,13218.3 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.235253,13206.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.994403,20461.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0672859,13187.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.8631,139127 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_162.txt deleted file mode 100644 index 02527d21f3d48f8563ff990061c3c0e2e95f794a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,73.8135,630460 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.239781,12074.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.16229,12076.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,205.848,1.78853e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.25524,12216.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.164927,12211 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,142.182,1.24893e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.242482,12585.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.159045,12599.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,110.367,1.01537e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.248456,12852.7 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.162662,12852.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,74.9507,700660 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.232584,12970.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.155391,12974.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,242.41,2.29846e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.259675,13325.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.160447,13318.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,95.6071,907276 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.271394,14051.3 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.162002,13534.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.8554,626911 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.230712,13619.3 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.153519,13621.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,40.1023,398753 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.203317,13623 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.149542,13617.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,138.461,1.38418e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.247029,13726 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.153794,13714.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,51.1921,505798 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.225032,13815.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.155001,13811.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,46.7371,478281 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.211394,13801.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.143395,13786.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.6998,233105 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.182991,13784.7 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.129795,13784.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,151.464,1.52713e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.228754,13738.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.150195,13738.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,31.5123,318125 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.196885,13751.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.141497,13751.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,152.001,1.52534e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.232968,13697.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.145913,13674.9 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,31.51,317531 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.199567,13742.8 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.140191,13735.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,151.76,1.52232e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.237976,13698.3 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.144409,13698.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,31.5362,318893 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.204191,13709.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.143964,13709.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,151.746,1.52187e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.228536,13694.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.142022,13685.3 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,31.4799,317124 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.200994,13713.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.138153,13705.9 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,151.636,1.51939e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.229628,13664.1 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.144275,13633.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,31.6331,318003 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.200764,13721.3 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.139787,13713.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,42.7572,439772 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.174015,13605.1 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.134892,13605.1 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.5968,140312 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.155187,13567.1 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0838394,13561.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,140.21,1.38343e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.202767,13279.5 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.137423,13271.9 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.1867,260375 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.16566,13222.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0968313,13220.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.23684,13213.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.997553,20469.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0675742,13197.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7524,138858 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_163.txt deleted file mode 100644 index bfaefe1f93f0c06c121c2d40d7a95c0bec41c7d4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,73.5306,618720 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.23948,11995.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.160278,12002.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,204.09,1.76839e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.258773,12169.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.156037,12169.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,141.132,1.24245e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.245346,12557.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.154549,12553.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,109.832,1.00734e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.248127,12826.1 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.156757,12826.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,74.6003,692660 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.228661,12963.1 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.15245,12965 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,242.491,2.28196e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253103,13331.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.155417,13325.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,95.0336,907107 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.237554,13533.5 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.155826,13535.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.5935,621315 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.228398,13659.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.146105,13659.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.6135,397543 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.20564,13661.1 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.146252,13663.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,137.839,1.38205e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.239839,13740.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.149785,13732.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,50.9582,493150 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.223737,13832.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.146992,13832.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,46.001,471056 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.205928,13121.2 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.137394,13113.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,21.6703,223766 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.184319,13805.5 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.130399,13790.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,150.402,1.51399e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.227282,13742 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.141,13702 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,30.6438,306393 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.199957,13793.3 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.138226,13785.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,150.584,1.51175e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.231679,13712.1 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.143673,13710.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,30.5536,305402 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.198997,13731.1 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.138623,13719.7 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,150.714,1.51273e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.229429,13673 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.141375,13671.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,30.5783,305575 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.20109,13721.9 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.14039,13712.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,150.393,1.50487e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.230805,13674 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.139986,13658.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,30.5596,304592 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.197362,13712.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.135807,13713 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,150.74,1.49737e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.232216,13677.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.14262,13662.4 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,30.5872,303610 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.199298,13698.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.141807,13696.5 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,65.9365,667846 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.198719,13575.9 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.132825,13575.9 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.6592,172608 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.174741,13551.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.102927,13549.3 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,221.694,2.14657e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.227048,13133.9 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.141084,13133.9 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.8577,309815 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.189375,13121.9 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.124553,13120 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.271298,13108.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.01795,20355.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0693854,13087.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.795,138076 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_164.txt deleted file mode 100644 index 82957f7e574b11d69bfd9f4026c65dc9dade14e1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,73.3237,616980 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.252975,11904.7 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.156789,11904.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,203.71,1.75182e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.257183,12094.3 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.160783,12099.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,141.917,1.24503e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.238699,12459.5 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.158214,12451.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,109.804,1.00448e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.243948,12738.5 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.15534,12740.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,74.5378,683595 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.226549,12869.7 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.160021,12869.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,240.355,2.24986e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253925,13206.9 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.158899,13210.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,94.6978,899986 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.241532,13462.9 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.162335,13455.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.8486,617053 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.239682,13570.7 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.159285,13567.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.7212,396716 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.20213,13559.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.150242,13566.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,137.215,1.36729e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.252351,13621.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.155605,13614 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,51.2211,499446 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.229266,13702.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.149071,13704.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.0727,457483 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.212652,13685.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.139596,13676 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,19.1047,196344 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.184997,13651.9 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.131113,13652.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,144.047,1.44141e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.22854,13589.9 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.154277,13582.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,27.3837,274863 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.17157,13646.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.139769,13639 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,144.48,1.44028e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.229426,13571.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.142994,13563.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,27.2863,271498 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.209167,13595.7 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.1415,13571 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,145.157,1.44271e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.232255,13521 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.143574,13503.9 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,27.4124,273426 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.169881,13565.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.144831,13565.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,144.194,1.43332e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.226523,13519.1 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.144755,13496.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,27.5437,272377 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.171343,13528.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.139577,13521 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,144.397,1.43233e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.238267,13517.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.145174,13502.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,27.3829,272840 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.168835,13523.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.140742,13521.3 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,65.9261,660511 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.197484,13423.2 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.135283,13415.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.738,173898 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.147356,13393.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0986681,13382.1 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,221.58,2.12602e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.217656,12998 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.140076,12988.4 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.9016,306467 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.15629,12985.9 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.129427,12985.9 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.283336,12970.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.980369,19534.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0798522,12962.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7891,136997 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_165.txt deleted file mode 100644 index cd027ffd1de79aaa5146212bd23f0020c997acc9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,69.3191,574626 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.236443,11743.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.161801,11743.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,201.037,1.70432e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.252194,11916.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.166258,11918.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,132.925,1.12746e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.240975,12253.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.1583,12262.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,108.741,976297 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.24661,12497.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.159158,12501.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,72.0484,646729 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.234565,12614.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.160024,12616.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,238.309,2.20005e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.25172,12985.9 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.166444,12980.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,92.4312,855102 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.239563,13230.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.155676,13224.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,61.9394,600469 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.23356,13330.3 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.150198,13332.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,38.9737,379692 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.207391,13326.5 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.146089,13326.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,136.564,1.335e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.248737,13370.1 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.155695,13372.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,50.3563,484603 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.231282,13480.9 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.151647,13458 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.4751,452573 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.210892,13458.1 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.148863,13458.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.5207,223934 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.184828,13446.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.135679,13431 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,148.642,1.45985e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.230367,12695.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.149679,12680 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,31.6327,306081 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.179061,13399.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.146617,13382 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,149.195,1.45513e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.237883,13288.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.144367,13288.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,31.1472,304086 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.208437,13340 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.140396,13332.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,149.03,1.45434e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.234428,13272.9 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.145771,13253.7 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,31.1364,302884 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.168044,13317.1 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.142767,13315.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,148.752,1.44963e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.228073,13249.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.145705,13203.6 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,31.1847,302886 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.211768,13309.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.138828,13293.9 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,148.805,1.44547e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.22502,13232.1 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.147206,13216.9 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,31.281,304041 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.170371,13285.9 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.141096,13278.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,44.15,437820 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.151567,13188.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.134178,13177 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.7904,143243 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.114892,13154.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0781433,13152.3 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,140.601,1.35055e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.167247,12963.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.136684,12947.8 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.487,259063 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.137045,12926.3 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.0862073,12908.9 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.238485,12897.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.982116,19970 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0672891,12869.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7986,135293 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_166.txt deleted file mode 100644 index 0a1f2f2a19baf9ac339ff93c32462d20bec705ce..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,68.7667,574325 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.240619,11857.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.16045,11859.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,199.437,1.692e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.254156,12003.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.164009,12003.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,132.105,1.14462e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.235595,12305.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.159211,12307 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,107.663,973206 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.244917,12572.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.167698,12574.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,71.6641,652760 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.227627,12679.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.159772,12696.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,238.694,2.20659e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.252536,13073.7 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.15734,13075.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,91.8926,858257 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.236524,13272.5 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.159874,13270.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.2652,605043 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.234482,13418.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.154556,13405.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.6306,388095 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.20694,13403.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.153762,13395.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,137.151,1.34841e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.244786,13454.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.152815,13448.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,50.6778,491265 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.222187,13531.5 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.154853,13525.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,46.1488,462071 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.209093,13536.1 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.147392,13536.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.9519,231737 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.184479,13515.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.137759,13515.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,149.264,1.47922e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.228498,13450.9 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.14774,13443.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,31.8358,313201 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.200178,13523.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.142124,13515.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,149.572,1.47623e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.229621,13439.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.152786,13427.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,31.8346,312797 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.200021,13473.7 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.144441,13466 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,149.62,1.47458e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.228017,13448.7 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.152805,13441.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,31.909,312018 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.201557,13454.5 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.140764,13454.5 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,149.771,1.47547e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.243183,13427.7 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.149849,13420.1 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,32.009,310425 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.200818,13469.7 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.142956,13462.1 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,149.492,1.47339e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.227759,13403 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.148809,13387.7 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,31.9162,312094 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.199403,13446.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.140626,13431.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,43.3567,434003 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.176009,13349 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.135337,13345.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,13.6609,138922 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.119318,13324.3 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0870107,13316.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,141.172,1.37186e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.198732,13080.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.137462,13070.7 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,26.3022,259044 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.172399,13049.1 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.100758,13039.5 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.238927,13014.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.01879,20127.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0675326,12985 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7069,137292 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_167.txt deleted file mode 100644 index 1b522abbf0d1936a04e32f08c37c4a49a59702b9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,68.6673,552231 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.241208,11726.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.155058,11720.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,200.192,1.70167e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.257295,11921.1 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.161327,11923 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,131.542,1.12377e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.239183,12239.9 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.162457,12232.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,107.076,962431 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.244555,12508.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.162063,12501.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,69.6903,626210 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.223259,12618.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.160703,12620.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,236.566,2.18903e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.248223,12991.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.160949,12985.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,89.7156,835641 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.240424,13234.7 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.157826,13240.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,62.0907,603424 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.227704,13335.9 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.151317,13337.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.5709,386957 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.206626,13336 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.151522,13336 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,137.07,1.34385e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.246549,13408.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.153139,13408.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,50.8769,491270 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.219637,13464.3 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.156767,13464.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.8861,459555 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.206898,13452.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.151388,13437.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,22.101,217288 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.184425,13429.7 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.136905,13429.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,148.381,1.45915e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.229173,13397.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.146972,13389.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,30.8738,301642 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.193752,13426.1 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.139452,13403 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,149.036,1.46068e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.235433,13366.7 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.144086,13366.7 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,30.858,297086 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.196265,13435.3 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.143116,13435.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,148.774,1.45894e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.22797,13347.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.146252,13347.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,30.786,299719 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.207135,13408.7 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.141891,13378.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,148.546,1.45724e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.226053,13353.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.145618,13353.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,30.9501,300799 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.196591,13372.5 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.140838,13362.9 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,148.89,1.45648e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.233234,13362.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.145974,13354.9 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,30.9092,300797 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.193576,13380 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.140057,13372.3 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,65.843,653244 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.192511,13268.3 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.134649,13268.3 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.6744,171486 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.188306,13254.9 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0999609,13253 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,221.692,2.10795e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.221845,12889.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.163077,12882.1 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.7774,300414 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.183574,12255.7 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.125654,12240.3 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.277272,12236.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.01792,19313.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0706011,12192.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,14.0469,136948 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_168.txt deleted file mode 100644 index feaab7025af2fa86071c5a370adbdaa04b0154c4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,68.0346,567023 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.242926,11653.9 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.157193,11633 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,198.122,1.67044e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.250604,11865.9 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.15671,11867.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,130.013,1.11358e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.239193,12203.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.155884,12205 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,107.084,959899 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.244674,12488 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.15702,12474.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,69.8678,621551 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.225858,12572.3 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.156917,12572.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,236.068,2.17194e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.253093,12960.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.154319,12953.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,89.75,832064 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.237247,13158.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.154725,13150.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,61.5093,591695 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.236501,13313.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.15198,13317 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,37.9497,367549 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.205176,13299.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.143887,13294 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,135.42,1.31761e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.248952,13345.5 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.153813,13345.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,49.1257,471004 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.224338,13416.1 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.144508,13408.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,44.7035,441269 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.20708,13404.7 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.144809,13387.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,19.0943,190322 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.183471,13400.9 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.129663,13385.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,146.109,1.42987e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.232396,13298.1 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.14292,13288.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,27.4608,269446 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.202956,13334.3 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.139001,13334.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,146.52,1.4281e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.229522,13257.5 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.137132,13249.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,27.5975,268547 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.204408,13293.9 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.142655,13286.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,146.527,1.42242e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.231272,13205.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.14526,13190.2 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,27.611,258973 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.203848,13226.3 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.142873,13226.3 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,146.34,1.41504e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.233778,13184.5 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.144911,13161.7 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,27.5379,263076 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.200987,13235.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.13606,13235.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,146.213,1.41418e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.229749,13141.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.143324,13141.5 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,27.4285,266700 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.203,13192.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.135372,13177.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,65.9098,644504 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.19382,13116.1 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.132486,13104.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.6885,169061 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.170767,13104.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0958996,13098.2 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,221.829,2.08623e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.223932,12736.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.139416,12736.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.7754,300848 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.186473,12762.3 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.121526,12762.3 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.272642,12743.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.01082,19715.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0705436,12721.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7132,133774 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_261.txt deleted file mode 100644 index 5764c923a5d3282cdcb947fdb7362031fdbc155a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,34.7468,335268 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.235557,13200.9 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.159352,13200.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,188.164,1.78043e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.241419,13365.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.158783,13354.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,61.9979,590333 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.213993,13439.7 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.151324,13424.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,95.3444,954185 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.235346,13743.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.156463,13737.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,32.7293,318875 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.200117,13729.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.145378,13724 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,197.554,1.98905e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.240514,13917.1 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.160147,13921 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,52.1635,514375 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.215471,14052.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.17894,14056 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,59.7311,622031 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.207858,14206.9 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.146476,14208.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,20.8594,219771 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.179158,14168.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.101334,14168.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,131.122,1.36675e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.233304,14193.9 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.150223,14186.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,32.7903,336046 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.212814,14293 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.142086,14293 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.1962,432834 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.199724,14300.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.137625,14283.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,25.6303,273951 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.165756,14352.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0852767,14337.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,95.0072,1.01095e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.220683,14426.8 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.139564,14426.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,49.6761,531193 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.193132,14548.9 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.103925,14548.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,95.1137,1.02728e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.217877,14622.9 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.142115,14615.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,49.5112,539310 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.181189,14729.1 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.0981597,14713.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,95.2238,1.03906e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.220635,14755.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.139324,14750 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,49.51,542456 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.183189,14866.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.10278,14866.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,95.1583,1.04659e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.217365,14870.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.142703,14872.1 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,49.5449,546845 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.146153,14966 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.102185,14950.6 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,95.1242,1.05274e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.215711,14988.9 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.141375,14981.3 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,49.6454,551692 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.189759,15059.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.0977175,15059.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,30.3868,347903 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.180328,15052.1 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.13094,15044.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,15.1535,174602 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.168495,15004.3 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0925081,14996.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,103.346,1.14545e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.20974,14898.6 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.13838,14883.3 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,29.2595,326863 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.184962,14793.9 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.10583,14784.4 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.249224,14761.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00365,22521 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0667038,14748.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.3432,156685 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_262.txt deleted file mode 100644 index 9d6d32c0c29b32303dc7183f7bda8f31d6325deb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,35.2933,336232 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.215829,13182 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.16686,13174.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,187.044,1.77672e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.238293,13350.7 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.161567,13354.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,61.4642,584495 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.216792,13406 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.152444,13400.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,95.044,944613 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.229416,13652.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.164559,13652.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,32.8006,315961 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.199932,13605 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.151244,13608.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,197.839,1.97877e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.241247,13803.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.168761,13790.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,52.2454,516670 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.216604,13924.7 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.157567,13924.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,60.1604,617242 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.205295,14096.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.154457,14090.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,21.2047,220074 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.18558,14069.5 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.111026,14077.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,131.422,1.35975e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.233743,14117.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.159439,14109.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,33.0462,335225 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.211237,14205.5 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.152405,14205.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.3616,430000 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.200959,14211.3 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.142543,14211.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,25.7155,273098 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.168764,14264.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0937949,14234.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,95.5801,1.00776e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.219084,14354.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.152822,14331.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,49.4638,526699 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.186341,14473.5 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.105312,14450.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,95.3545,1.01838e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.222056,14553.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.147052,14538.3 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,49.4709,534382 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.154511,14660.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.100972,14645.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,95.2174,1.03527e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.215919,14685.3 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.144258,14670.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,49.5222,541826 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.151487,14773 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.102262,14771.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,95.4371,1.04533e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.221906,14796.1 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.147052,14788.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,49.4384,545041 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.152898,14903.1 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.103965,14885.9 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,95.3308,1.05056e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.222447,14920.3 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.150613,14907 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,49.392,548923 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.147727,15015.7 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.103875,15008.1 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,30.3057,345984 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.186344,15013.9 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.132668,14998.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,15.2037,175408 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.136431,14943.3 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.0966875,14933.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,103.299,1.14147e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.212952,14843.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.142169,14835.8 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,29.2995,324166 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.151148,14721.6 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.116937,14714 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.254367,14714 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.98345,22492.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0701629,14696.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.3137,154797 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_263.txt deleted file mode 100644 index 5bd59d30a06f2727acb186f27e93fe96da2ddf1b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,43.9947,429024 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.238895,13535.3 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.173062,13533.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,271.997,2.63297e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.238738,13720.9 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.161378,13719 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,66.9047,646028 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.218233,13730.5 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.15608,13722.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,136.932,1.38122e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.229388,13962.5 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.160652,13966.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,35.9886,354647 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.209474,13974.1 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.151762,13960.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,287.028,2.93179e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.23454,14219.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.160588,14208 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,57.1948,573312 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.22621,14302.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.16343,14306.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,85.8868,899638 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.219551,14474 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.15821,14460.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,23.214,242716 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.188582,14437.7 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.121401,14447.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,180.243,1.91073e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.238383,14465.8 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.159004,14458.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,37.0562,386566 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.214591,14549.9 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.147427,14551.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.1684,523008 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.200982,14576.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.13942,14576.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,31.728,346355 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.168511,14658.7 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0972408,14658.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,116.281,1.26585e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.224591,14808.9 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.143794,14801.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,59.4537,654842 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.190713,14971.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.112208,14971.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,116.333,1.28943e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.219241,15069.1 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.145331,15046.1 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,59.4544,663491 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.157176,15229.1 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.116576,15221.5 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,116.33,1.3099e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.220723,15291.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.140172,15293.7 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,59.5804,676158 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.163391,15422.3 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.115487,15415 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,116.449,1.32912e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.227142,15469.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.143212,15454.1 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,59.6298,678938 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.166425,15587.1 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.118591,15579.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,116.215,1.33947e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.221916,15608.1 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.143376,15610 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,59.5738,689721 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.159478,15717.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.115145,15713.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,39.2803,462516 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.157212,15690.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.135881,15683.1 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.2666,191962 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.14735,15607.3 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.109309,15605.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,149.547,1.70744e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.215286,15291.7 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.139692,15276.5 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.2073,352971 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.154032,15169.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.131046,15163.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.258079,15152.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.977345,22692.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0791673,15126.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.5271,159046 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_264.txt deleted file mode 100644 index 6ae91b073a0abf842fa52c2091f88f9416c66b21..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,43.0578,421884 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.214812,13509 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.156427,13501.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,272.508,2.63415e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.241632,13681.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.166139,13687.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,66.4178,642908 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.218409,13740.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.155096,13734.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,136.769,1.38371e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.23343,13985.5 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.158338,13989.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,35.762,358862 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.203519,13957.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.151323,13957.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,288.131,2.93659e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.238605,14236.5 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.159807,14236.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,57.6279,581751 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.221084,14321.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.15844,14323.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,86.2336,907284 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.221942,14489.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.15181,14493.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,23.4802,248247 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.187858,14471 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.124622,14465.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,180.724,1.91635e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.23222,14490.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.155435,14492.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,37.3881,388915 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.209916,14576.9 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.151451,14571.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.6641,527750 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.206597,14586.5 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.144513,14582.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,31.694,345698 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.167983,14692.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0933739,14668 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,116.311,1.26531e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.22319,14795.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.144773,14778.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,59.5977,652013 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.195503,14971.3 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.120049,14956.1 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,116.306,1.28727e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.221833,15049.8 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.142513,15053.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,59.7765,667706 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.166709,15196.8 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.123617,15181.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,116.405,1.31163e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.228537,15306.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.141649,15284 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,59.6347,674600 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.168309,15419.3 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.117682,15412.5 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,116.76,1.33236e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.222748,15445.9 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.150523,15440.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,59.5797,682775 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.192722,15566.1 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.115624,15559.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,116.612,1.34262e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.225119,15590.3 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.144351,15575.1 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,59.569,690163 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.195113,15690.7 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.110791,15689 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,39.4258,463999 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.189731,15675.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.130619,15660.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.2577,193112 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.177509,15590.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.108363,15582.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,149.611,1.70258e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.220252,15255.5 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.136232,15247.9 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.1682,353272 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.193814,15148.3 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.130507,15142.7 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.265622,15131 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00494,23412.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0722314,15092.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.6637,160822 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_265.txt deleted file mode 100644 index c74cb8bdba74b5c918d855de74d436dbb2277f5b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,43.0297,421590 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.216623,13512.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.161039,13504.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,271.259,2.65759e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.250704,13709.9 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.158408,13706.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,66.9953,631417 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.220338,13734.5 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.15861,13721.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,136.047,1.37755e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.233353,14008.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.161205,14010.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,35.6025,356137 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.210802,13974.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.15131,13266.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,287.591,2.92254e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.239705,14221.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.16699,14211.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,57.183,573760 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.221913,14350.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.160927,14331.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,85.8869,901906 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.217967,14530.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.150658,14522.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,23.1587,246421 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.191759,14495.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.118612,14497.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,180.139,1.91246e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.241638,14523.1 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.159205,14515.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,37.1515,386630 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.21773,14594.3 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.151064,14600.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.151,521378 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.204197,14636.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.139547,14621.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,31.8806,346492 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.178757,14691 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.096356,14691 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,116.125,1.26545e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.24807,14816.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.153675,14808.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,59.5723,655922 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.159397,15021.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.115953,15009.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,116.411,1.29243e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.226863,15101.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.144568,15093.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,59.5557,666248 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.162203,15236.7 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.117784,15229.1 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,116.319,1.30925e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.222786,15306.9 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.144926,15286.2 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,59.6213,676288 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.156776,15417.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.115205,15416 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,116.178,1.32115e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.230575,15465.3 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.141096,15457.9 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,59.6443,682331 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.195749,15587.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.11828,15579.5 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,116.42,1.33942e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.224425,15607.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.147016,15586.5 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,59.684,689434 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.190479,15713.6 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.115559,15694.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,39.28,461424 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.193173,15673.5 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.13644,14878.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.4487,189484 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.17452,15603.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.113435,15603.2 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,149.442,1.70307e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.22726,15270.9 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.144421,15263.3 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,31.278,353649 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.18965,15169.9 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.131013,15160.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.267964,15140.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.00304,23360.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0676843,15117.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.452,158737 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_266.txt deleted file mode 100644 index 6c3baca1485041072a6b52c3b76f0cf7a79130b3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,39.7146,378068 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.218108,13238.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.159422,13236.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,239.56,2.26178e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.241266,13340 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.164277,13341.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,67.4237,627026 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.225317,13389.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.159643,13389.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,121.455,1.18506e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.236952,13623.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.157358,13614.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,36.5663,356874 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.211567,13595.9 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.160708,13597.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,258.364,2.54603e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.240079,13820.3 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.161755,13812.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,59.1213,582336 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.227391,13945.9 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.161138,13940.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,80.4979,818867 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.226783,14145.9 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.156463,14153.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,24.1107,247697 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.200082,14140.3 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.133041,14132.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,169.174,1.74982e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.241122,14159 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.157157,14145.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,39.0231,395088 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.212047,14271.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.152168,14261.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.6749,514228 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.209736,14279 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.145368,14279 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,33.6214,356908 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.171435,14365.5 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0998312,14350.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,118.443,1.26131e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.230053,14506.8 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.147041,14499.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,63.9063,683456 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.196482,14700.1 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.11795,14684.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,118.488,1.28619e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.229698,14816.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.145739,14820.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,63.8902,700698 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.162098,14979.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.134283,14972.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,118.587,1.31137e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.22653,15071.9 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.142443,15041.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,63.9643,713251 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.164258,15223.1 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.124923,15223.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,118.392,1.33126e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.22989,15285.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.145662,15270 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,64.2011,724774 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.165378,15399 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.133384,15399.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,118.576,1.34731e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.225346,15421.9 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.144219,15422.3 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,64.1962,731146 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.174882,15539.3 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.131089,15524.1 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,46.0521,531288 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.198956,15437.5 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.139137,15438.1 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.8328,200874 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.151061,15390 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.103898,15373.1 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,167.132,1.86052e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.227692,14944.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.143841,14936.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,32.1129,354746 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.162549,14835.7 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.125841,14820.5 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.272499,14803.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.995802,22882.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0691849,14793.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.8295,157123 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_267.txt deleted file mode 100644 index e9b0012ca0bcc9f50fc86cc2e7d51e48ae00e410..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,40.807,387831 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.220335,13226.9 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.16691,13219.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,241.286,2.28873e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.242402,13325 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.16435,13321.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,68.1855,633617 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.222309,13363.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.161061,13367 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,122.519,1.1961e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.245535,13597.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.164098,13592.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,36.4864,348271 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.209871,13575.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.15557,13552.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,258.982,2.55435e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.240303,13783.1 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.163179,13775.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,59.5061,579185 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.223708,13926.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.155755,13917.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,80.6501,819398 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.223701,14094.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.156642,14102.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,23.992,246036 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.193826,14096.3 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.13169,14100.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,169.762,1.75316e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.240492,14117.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.156264,14119.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,39.0104,395340 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.212741,14260 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.148468,14252.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.3044,509708 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.202747,14254.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.13979,14254.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,33.4185,355708 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.174098,14332.9 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0953958,14332.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,118.633,1.25677e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.231973,14484 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.14668,14476.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,63.8179,687296 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.16187,14692.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.116843,14685 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,118.798,1.2919e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.227439,14812.7 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.146049,14782.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,63.8659,698343 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.159806,14989.2 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.126929,14989.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,118.613,1.31388e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.229301,15041.3 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.476235,18046.9 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,63.6028,710556 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.159915,15238.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.116596,15223.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,118.725,1.33224e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.234994,15237.1 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.152865,15231.5 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,63.8327,720263 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.163807,15400.7 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.122698,15378.1 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,118.892,1.34722e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.227666,15421.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.146443,15406.3 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,63.9231,730130 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.161755,15534.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.120267,15534.4 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,45.9485,530574 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.19564,15451.1 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.135684,15428.5 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.7976,199227 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.14458,15389.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.104593,15372.7 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,166.909,1.86195e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.221455,14938.5 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.142772,14930.9 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,32.1133,354758 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.15667,14849.1 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.125627,14818.6 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.257753,14814.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.983078,22917.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0701449,14814.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.603,155417 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_268.txt deleted file mode 100644 index 805f15081a124b40c4b8269fe8ccd2fa1f5eab6a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,41.0609,389058 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.248735,13243.9 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.164907,13230.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,240.56,2.2744e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.239839,13317.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.163666,13321 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,68.2105,640397 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.220434,13378 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.160491,13379.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,122.194,1.19443e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.231138,13605.1 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.16555,13599.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,36.4448,348862 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.206357,13575.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.15819,13571.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,258.714,2.55796e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.234524,13822.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.162609,13809.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,59.2753,577422 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.21676,13918.9 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.159845,13920.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,80.5336,819867 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.222091,14096.1 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.159339,14090.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,23.9475,248301 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.194347,14098 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.122091,14105.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,169.449,1.75265e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.234729,14145.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.153848,14139.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,39.1202,396021 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.216245,14237.3 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.165019,14241.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.253,511164 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.201928,14256.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.142801,14233.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,33.4833,357950 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.138616,14338.5 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0883655,14329 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,118.579,1.26092e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.229301,14493.5 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.148731,14485.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,63.8307,685593 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.164107,14680.7 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.126456,14680.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,118.899,1.29456e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.224338,14784.1 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.148226,14776.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,63.7843,700211 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.162516,14978 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.119371,14964.7 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,118.824,1.31562e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.231487,15049.1 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.147883,15033.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,63.9343,712596 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.165422,15192.9 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.118538,15192.9 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,118.816,1.33376e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.230863,15231 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.148936,15223.3 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,64.0385,721620 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.15962,15370.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.121418,15355.7 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,118.945,1.35219e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.224491,14615.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.141688,15376.1 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,64.0118,729402 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.158462,15527.1 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.118494,15512.3 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,46.0323,531934 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.195115,15447.7 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.134382,15440.1 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.8233,196867 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.149906,15365.1 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.10114,15340.9 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,166.984,1.85964e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.22461,14953.7 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.144459,14946.1 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,32.2532,357170 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.165377,14841.6 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.121608,14841.6 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.266719,14826.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.987314,22923 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0666438,14818.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.6465,155554 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_269.txt deleted file mode 100644 index f64ae15dfc2b9bbe95895df76765388b57c3bde4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,41.0589,389526 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,0.239077,13229 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.165512,13213.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,241.763,2.27007e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.237893,13285.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.161691,13285.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,68.0729,640085 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.224187,13353.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.156254,13353.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,122.153,1.1931e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.229454,13597.7 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.165598,13592.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,36.4707,343294 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.21012,13567.9 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.157995,13546.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,258.589,2.55164e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.240277,13782.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.166158,13784.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,59.2977,569658 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.219281,13917.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.158853,13894.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,80.386,817142 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.216875,14098.1 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.157915,14090.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,23.9888,244454 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.203624,14098.1 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.138408,14090.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,169.163,1.75117e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.234482,14122.7 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.1621,14099.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,39.3339,400570 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.215282,14225.5 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.151787,14227.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,48.4712,512325 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.206414,14233.1 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.146151,14235 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,33.6261,357581 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.170184,14355.9 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.0956167,14354 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,118.948,1.24848e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.224434,14495.1 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.143509,14479.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,63.8797,686364 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.196584,14665.7 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.121214,14652.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,119.21,1.29817e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.223486,14791.7 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.148053,14778.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,64.0499,700906 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.16453,14947.1 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.129873,14924.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,118.957,1.31703e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.231995,15031.9 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.152062,14993.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,64.0246,712880 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.158401,15179.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.124209,15172 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,118.943,1.33614e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.225058,15206.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.15124,15201 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,63.9481,716213 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.194782,15367.5 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.126907,15360.1 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,119.141,1.34272e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.22451,15388.5 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.147445,15380.9 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,63.9854,709064 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.195573,15498 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.116977,15498 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,46.0994,530583 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.190437,15438.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.136027,15419.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,16.8004,201115 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.178411,15357.9 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.100967,15331.3 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,167.193,1.86327e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.225605,14921.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.143633,14919.5 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,32.2525,357214 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.193957,14818.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.120766,14811.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.25774,14799.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.01096,22894.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0683877,14774.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,13.7291,155959 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp32_perf_fp32_120.txt deleted file mode 100644 index a32a995a8279896b28f6a55ddebb630edc916ba4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/mobilenet_cifar10/mobilenet_cifar10_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,255 +0,0 @@ -Conv1,96.9252,865993 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -BatchNorm1,1.44629,18053.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Relu1,0.468751,12163.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,380.5,3.34989e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -BatchNorm2,0.320127,12396.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.158969,12386.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,224.11,1.88066e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -BatchNorm3,0.258617,12404.1 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.133142,12409.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,208.316,1.9126e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -BatchNorm4,0.245676,13072.1 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Relu4,0.141856,13068.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,81.0615,730270 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -BatchNorm5,0.244313,13234.1 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Relu5,0.189974,13243.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,593.95,5.13284e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -BatchNorm6,0.27095,12120.5 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu6,0.191753,12120.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,146.124,1.21007e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -BatchNorm7,0.263548,12267.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu7,0.192726,12279.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,123.276,1.05377e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -BatchNorm8,0.245183,12473.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Relu8,0.123152,12465.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,43.0723,393906 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -BatchNorm9,0.177062,12631.5 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu9,0.107667,12633.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,266.287,2.49353e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -BatchNorm10,0.242575,13217.8 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu10,0.132793,13223.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv11,67.4417,625092 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -BatchNorm11,0.239804,13424.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Relu11,0.175203,13418.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,75.8047,715723 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -BatchNorm12,0.241129,13595.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu12,0.151622,13585.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,27.137,260044 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -BatchNorm13,0.217503,13681.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu13,0.14694,13672.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv14,199.051,2.00991e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -BatchNorm14,0.2499,14265.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Relu14,0.156185,14265.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv15,41.0208,407067 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -BatchNorm15,0.23191,14400.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Relu15,0.15607,14400.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv16,198.945,2.13558e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -BatchNorm16,0.253142,15090 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu16,0.160892,15065 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv17,40.8306,435387 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -BatchNorm17,0.232492,15161 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu17,0.153104,15151.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv18,205.78,2.29436e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -BatchNorm18,0.257356,15630.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Relu18,0.154368,15621.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv19,40.2938,441269 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -BatchNorm19,0.235654,15674.7 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu19,0.156019,15655.5 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv20,236.123,2.63379e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -BatchNorm20,0.260556,15668.4 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu20,0.155836,15660.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv21,40.2208,434870 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -BatchNorm21,0.231612,15753 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Relu21,0.159161,15730.1 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv22,234.944,2.64206e+06 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -BatchNorm22,0.249119,15735.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu22,0.160815,15717.9 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv23,39.8626,434106 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -BatchNorm23,0.239164,15796.7 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu23,0.156585,15777.4 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv24,84.0255,958156 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -BatchNorm24,0.228284,15628.5 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Relu24,0.146156,15613.1 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv25,21.2845,239748 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -BatchNorm25,0.186892,15613.1 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu25,0.110233,15607.2 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv26,305.989,3.38538e+06 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -BatchNorm26,0.246278,15220.6 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu26,0.151472,15203.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv27,40.6724,447027 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -BatchNorm27,0.207692,15250.9 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Relu27,0.132445,15241.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Pool1,0.908599,15233.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,1.12393,23627.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add1,0.0794172,15208.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Softmax1,14.3291,173664 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_120.txt deleted file mode 100644 index e5768fe5ea99f84981077939f7b7946785db1f65..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,62.6208,504839 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.204305,11274.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.173355,11272.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,177.564,1.46427e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220104,11391.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.171121,11382.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,179.212,1.47709e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.220852,11464.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.148129,11452.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.159691,11460.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,180.398,1.5005e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.220692,11556 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.175614,11554 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,179.578,1.50176e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.21883,11555.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.155486,11550 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.161313,11559.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,180.753,1.51802e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.219291,11666.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.172926,11647.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,180.126,1.51393e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.22139,11636 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.147553,11632.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.164721,11639.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,47.5749,393350 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.197073,11783.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.168151,11780 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,76.0434,647866 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.208026,11838.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.6613,166734 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.174935,11869.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.14193,11873.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.165761,11883.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,75.0139,654553 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.200519,11928.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.165636,11920.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,75.9454,655681 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.20459,11960.9 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.148145,11949.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.170452,11955.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,76.3321,664495 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.202321,12030.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.169706,12028.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,75.7521,659866 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.202228,12051.3 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.146369,12051.1 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.161882,12060.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,24.4298,210793 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.181179,12086.7 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.146884,12092.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,38.8564,344124 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.180357,12125.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,10.888,96816.9 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.155514,12134.9 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.119579,12113.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.122762,12125 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,37.5035,339187 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.169486,12146.3 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.141293,12138.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,38.8939,342824 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.187201,12153.3 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.126532,12149.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.148068,12160.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,39.1741,348960 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.183642,12157.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.148407,12149.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,39.0636,351543 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.182654,12185.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.127377,12181.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.145982,12198.7 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.528313,12200.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.30332,12202.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.12313,12172.3 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.3872,13996.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_151.txt deleted file mode 100644 index 786d8dbd8e2975e5dcf200279de9ad4f5ee9d9d2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,42.1025,331178 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.203914,11070.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167274,11072.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,77.2909,624173 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.20379,11185 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.165642,11171.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,78.8872,641882 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21258,11297.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.156017,11288.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.156026,11303.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,79.988,655956 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.202046,11393.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.165655,11393.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,79.2363,654772 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.202391,11454.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.149879,11456.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157962,11460.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,79.9301,667196 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.203089,11592.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168244,11590.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,79.0317,663786 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.198548,11645.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.149294,11636 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.156958,11643.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,29.7754,250662 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.201719,11739 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.160871,11744.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.3569,342497 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.190558,11758.1 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,18.1525,156927 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.152638,11794.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.116695,11799.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.0913478,11803.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,38.6218,342251 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.175514,11823 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.164484,11822.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,39.8432,345502 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.182158,11813.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.141978,11823.1 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.160209,11834.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,40.0361,346066 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.182388,11845.9 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.165822,11847.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,39.9824,345056 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.186641,11834.9 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.146154,11842.5 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.164461,11848 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,15.6067,135479 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.15931,11868.7 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.138327,11864.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,20.4178,180705 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.154503,11878.3 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,10.4109,95442.7 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.138993,11876.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0791971,11886 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0786115,11887.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,20.4469,182135 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.154919,11876.5 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.138039,11880.3 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,21.0178,188571 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.163204,11871 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.129565,11878.7 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.143633,11882.5 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,20.9779,187402 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.160859,11890.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.142465,11875.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,21.0379,189157 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.164174,11869.3 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.122781,11874.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.151527,11874.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.46661,11886.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.24731,11886.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0726629,11878.8 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.23727,12451.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_152.txt deleted file mode 100644 index 600dc88cbedd2c88a6cb6cfa2f84bda1e54ebe7e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,41.7255,328354 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195306,11043.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172791,11030.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,76.8293,618767 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.202491,11167.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.164016,11162 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,77.9585,629627 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.204298,11253.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.147258,11253.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.153575,11259.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,79.4552,645443 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.204382,11412.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.166052,11404.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,78.5592,650498 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.202612,11458.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.152455,11468.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157041,11471.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,79.5538,662985 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.202208,11578.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.170609,11576.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,78.6036,661670 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.201694,11639.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.147361,11639.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.15827,11645.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,29.5924,247685 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.183873,11734.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.161406,11744.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,39.4092,342559 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.18418,11740.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,18.2101,157182 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.154878,11763.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.119354,11763.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.0931747,11775 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,38.7238,337313 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.17578,11808 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.159248,11794.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,40.0541,345549 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.182059,11786.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.141815,11800.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.165885,11804 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,40.1396,346722 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.17819,11807.9 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.161015,11804 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,39.9928,347406 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.179018,11805.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.143879,11803.8 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.162426,11817.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,15.7216,137456 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.157719,11830.4 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.138039,11834.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,20.479,181242 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.153914,11838.3 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,10.4474,95123.7 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.136001,11851.7 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0753923,11838.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0775427,11849.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,20.3511,181402 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.150442,11850 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.135665,11851.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,20.9284,188101 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.155991,11851.9 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.12353,11855.7 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.138263,11859.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,20.6996,183975 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.155792,11863.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.135159,11859.5 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,21.0454,186307 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.156814,11871.1 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.123629,11863.4 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.1473,11865.3 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.470191,11878.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.250315,11874.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0783232,11867.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.25459,12876 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_153.txt deleted file mode 100644 index a8c00d45766dd61abbccb3447c9843273e812b23..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,39.2902,302746 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.194282,10753 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.171389,10772 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,74.5737,591991 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.208363,10884.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.164769,10888.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,76.563,599496 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.206788,10997.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.14889,10991.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.158644,10995.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,77.5378,622350 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.204366,11152.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.182657,11158.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,76.5488,616585 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.203781,11171.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.150526,11171.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157367,11179.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,77.5713,634131 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.206548,11309.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.166986,11313.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,76.8136,623860 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.202513,11364.5 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.147665,11360.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.157863,11374.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,29.2647,238370 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.181006,11441.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.161915,11447.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,38.9844,330356 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.18122,11456.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,17.8395,151609 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.157044,11467.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.109652,11464.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.0951045,11489.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,38.4034,327927 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.174897,11525.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.167889,11519.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,39.8942,337672 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.182264,11498.3 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.143038,11494.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.165034,11509.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,39.8426,338902 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.179719,11530.9 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.161051,11536.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,39.807,336342 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.190631,11534.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.143041,11544 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.160606,11547.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,15.6155,133057 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.153764,11559.3 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.144916,11561.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,20.6401,179060 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.156493,11559.3 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,10.4302,92865.8 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.140883,11551.7 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0839937,11544.1 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0774945,11557.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,20.341,178133 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.148689,11549.9 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.135156,11555.7 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,21.0171,182230 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.158154,11553.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125069,11553.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.141309,11559.5 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,20.8662,179452 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.150285,11555.7 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.139575,11559.5 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,21.0884,182911 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.166781,11557.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.126864,11559.5 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.142695,11567.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.477797,11576.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.254052,11574.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0714568,11546 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.26098,12133.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_154.txt deleted file mode 100644 index 6480cd1c9857412d9edfc4452aa8c9ade5d16561..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,38.7211,300352 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.196947,10836.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165751,10838.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,74.523,594803 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.202011,10941.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.166049,10957.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,76.0639,606974 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.206692,11056.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.147703,11049 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.158186,11052.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,77.679,626789 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.200996,11183 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.171268,11186.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,76.4922,619582 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.20427,11230.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.150676,11228.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.154174,11236.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,76.9268,630270 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.201924,11351.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168199,11347.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,76.2617,627009 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.208318,11391.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.153293,11387.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.157697,11391.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,28.9703,236084 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.176138,10897.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.159335,10903.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,38.7976,324440 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.175629,11521.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,17.5909,149617 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.152413,11534.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.113466,11540.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.0900132,11554.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,38.0111,326607 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.171316,11555.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.162138,11557.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,39.5475,335374 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.184955,11547.9 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.152225,10964.3 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.156791,10972 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,39.399,334126 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.184081,11584.3 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.168109,11603.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,39.2594,331374 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.191911,11584.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.145463,11603.5 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.163089,11616.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,15.4498,130494 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.156122,11636.1 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.144304,11636.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,20.2917,176364 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.154055,11641.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,10.3201,93084.5 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.136158,11622.7 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0715077,11632.3 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0745285,11645.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,20.129,176475 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.148567,11626.7 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.138865,11632.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,20.7945,182563 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.155272,11615.2 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125051,11622.9 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.14018,11634.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,20.7104,179669 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.152506,11619.1 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.139943,11621 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,20.8592,183779 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.15529,11626.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.124215,11618.9 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.139051,11618.9 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.455749,11628.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.236337,11630.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0688514,11618.9 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.27419,12648.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_155.txt deleted file mode 100644 index 516dfb5b2e62961dae843a98e0d497c345ee5f04..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,46.4951,363735 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.196449,11194.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.166686,11194 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,95.1914,775570 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.208955,11343.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169508,11355.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,96.8453,795969 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.214564,11434 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.158301,11439.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.162797,11449.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,98.0776,814828 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.208065,11620.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.165489,11616.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,97.0066,811310 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.202301,11643.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.144353,11645.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.158458,11647.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,97.9465,823840 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.204615,11737 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.172193,11742.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,97.1248,825100 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.207636,11784.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.151512,11771.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.156084,11779.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,35.1464,299384 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.188359,11908.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.173533,11936.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,48.9689,428331 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.185953,11953.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.9562,173532 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.158212,11960.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.12786,11966.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.115277,11970.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,47.902,423740 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.185159,11995.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.161834,11993.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,49.5207,432998 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.184455,11988.5 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.143089,11984.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.165818,11991.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,49.5391,434498 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.183556,12037.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.171566,12043.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,49.448,433688 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.187249,12032.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.142932,12028.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.163277,12040 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,17.7985,157549 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.165853,12054.3 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.145255,12048.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,24.4838,219883 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.160673,12032.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.0361,99109.8 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.132864,12024.5 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0875301,12022.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.081645,12031.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,24.3331,219919 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.155952,12019 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.142647,12015 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,25.0519,225088 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.161057,12022.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.123367,12024.7 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.147978,12034.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,25.069,224657 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.158282,12017.1 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.140254,12028.5 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,25.0504,223780 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.162653,12019.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.127204,12024.9 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.139469,12026.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.496319,12032.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.290376,12030.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.103578,12030.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.28758,12612.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_156.txt deleted file mode 100644 index 1f5bb7ca4479bd2929694021aa192e83bae5df94..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,46.3793,360478 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195889,11276.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.170801,11274.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,94.6525,782990 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.206669,11357.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.164996,11364.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,96.5177,794718 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.214778,11485.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.15064,11489.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.15745,11491.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,97.7861,812718 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.207312,11624.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.168129,11622.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,96.797,812690 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.209047,11649.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.147086,11657 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.155418,11660.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,97.7592,828084 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.203898,11772.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168977,11782.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,96.6002,821463 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.202871,11824.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.148881,11828.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.155297,11828.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,36.5045,309108 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.188538,11917.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.16497,11915.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,51.893,453452 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.193611,11943.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,20.3151,177282 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.164496,11978.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.128493,11976.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.109293,11983.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,50.5423,448585 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.184442,12021.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.162596,12031.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,52.2169,457988 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.189431,12033.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.141422,12035.5 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.161936,12048.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,52.6134,461346 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.189623,12077 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.165716,12065.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,52.2984,459593 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.191367,12054.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.146541,12064.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.501346,13860.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,17.9254,157814 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.162205,12078.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.140787,12082.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,24.0885,217926 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.15657,12086.5 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,10.9532,97077.8 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.133943,12069.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0983812,12080.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.081053,12084.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,23.9958,216954 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.153847,12098 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.140803,12094.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,24.8268,219949 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.162945,12080.9 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125815,12088.5 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.139783,12092.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,24.6496,219469 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.156685,12080.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.139712,12077 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,24.6868,219414 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.159601,12081.1 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.123994,12080.9 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.141802,12080.9 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.482347,12082.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.288257,12080.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.10329,12080.9 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.32124,14976.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_157.txt deleted file mode 100644 index 17b884ece43f34f3a1adc72ca028d28e06277f69..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,47.1371,365002 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.200926,11232.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.168077,11238.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,97.9776,806133 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.210599,11360.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.17601,11366.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,100.175,818140 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211361,11449.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.148887,11449.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.155687,11456.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,101.313,837607 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.211018,11595.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.170058,11597.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,100.235,837138 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.208558,11626.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.147578,11628.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.152945,11634.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,101.425,852459 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.217012,11771.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.173047,11765.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,100.636,845628 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.209892,11780.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.149863,11765.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.155021,11771 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,36.4794,307534 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.188935,11887.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.16512,11892.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,52.208,452912 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.195443,11894.1 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,20.4608,177813 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.167232,11907.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.133386,11921.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.118429,11932.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,50.7088,447510 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.185597,11964.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.158593,11966.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,52.4322,458226 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.192458,11970.3 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.144791,11964.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.161889,11985.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,52.4391,460381 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.193543,12006.7 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.169569,12018.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,52.3099,457756 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.191437,12012.3 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.144557,12014.3 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.154932,12016.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.359,168131 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.170564,12050.5 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.14258,12050.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.2208,241963 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.163844,12020.1 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.4742,103374 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.147642,12031.5 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0893282,12033.4 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0800389,12035.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,26.8358,242244 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.167018,12031.5 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.146839,12025.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,28.0046,246969 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.169105,12048.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125393,12037.3 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.143005,12039 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,27.8585,246907 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.16721,12047 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.140291,12023.9 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,27.9707,246224 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.17083,12027.7 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.123367,12029.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.150414,12044.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.500584,12061.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.323294,12046.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.120858,12043.1 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.38314,14412 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_158.txt deleted file mode 100644 index e98ad1dac3c713b97c23585b7ba1b320e4d4bc94..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,45.3649,351246 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.198077,10842.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167095,10848.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,93.4682,742975 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212638,10985.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.16466,10989.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,95.4044,758756 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21259,11089.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.152186,11089.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.156686,11100.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,97.06,778990 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.207981,11202.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.169482,11213.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,95.9929,776887 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.207693,11238.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.145626,11234.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157671,11248.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,96.7122,786602 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.208877,11416.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.171114,11410.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,96.0349,778312 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.208868,11437 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.151575,11444.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.15857,11450.5 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,34.5979,281611 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.189521,11522 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.168845,11529.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,48.8281,413618 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.185639,10966.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.5382,161047 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.162221,11590.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.127472,11595.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.112388,11599.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,47.7494,411233 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.179268,11640.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.164192,11644 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,49.3253,412576 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.195249,11630.5 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.143933,11640.1 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.158637,11653.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,49.4295,410691 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.189815,11071.5 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.157168,11077.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,49.9413,408263 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.189271,11052.2 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.143197,11063.7 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.162183,11069.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,18.1605,150442 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.16536,11685.7 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.142289,11678 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,24.9233,214438 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.16411,11672.1 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.1513,97298.5 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.164996,11071.3 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0814528,11065.5 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0784673,11065.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,24.6982,214926 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.167987,11666.3 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.143037,11660.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,25.4734,221295 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.173341,11668.2 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.137431,11670.1 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.149933,11673.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,25.3427,218774 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.16626,11658.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.148352,11670.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,25.3068,217734 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.16464,11670.1 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.129645,11660.5 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.146871,11671.9 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.512827,11679.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.299771,11679.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.110944,11664.3 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.33906,14567.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_159.txt deleted file mode 100644 index 84b449752cbe908ff0271b75f6a0bfe8f2cae235..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,44.4202,346297 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.196084,10939.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172138,10943.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,92.4501,740633 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.205501,11047.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.165642,11050.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,94.4691,750930 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.204541,11179.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.149248,11183.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.161351,11187 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,95.6935,771756 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.515864,13069.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.167431,10736.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,95.9264,767258 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.202272,11335.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.157895,11339.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.158212,11343.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,95.6623,784604 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.204113,11469.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.17234,11468.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,94.4249,774456 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.205649,11513.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.148695,11509.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.154689,11519.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,35.5736,295192 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.186474,11631.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.166993,11629.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,51.0173,432711 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.185281,11671.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.5907,165968 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.159095,11677.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.134301,11696.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.116272,11698 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,49.8657,433978 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.180228,11723.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.160599,11723.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,51.4392,439480 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.190029,11707.1 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.13889,11707.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.158903,11709.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,51.5985,443420 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.188548,11756.9 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.160647,11759.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,51.7089,441483 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.191761,11762.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.146625,11768.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.165668,11777.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,17.7843,154158 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.162772,11789.7 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.146276,11797.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,24.4869,215315 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.161156,11785.5 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.012,93090.3 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.141012,11764.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0942946,11768.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0804673,11779.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,24.2635,213766 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.155303,11783.5 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.138612,11775.9 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,25.1662,222125 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.163773,11760.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.12633,11764.3 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.140624,11781.5 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,25.0422,218318 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.160689,11771.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.139709,11768.1 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,25.1088,220040 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.159495,11760.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.128381,11762.3 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.142407,11766.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.489137,11768 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.280977,11762.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0977411,11743.1 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.30064,13360.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_160.txt deleted file mode 100644 index ba5a39afb52f7a01ac475b342a5d6868d04fb006..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,44.8557,349989 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.201735,10935.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.161296,10937.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,95.4328,764135 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.209043,11020.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.164733,11026 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,97.4355,765626 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210622,11144.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.152538,11133.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.155821,11148.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,98.9292,797493 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.205998,11291.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.167424,11280.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,98.3052,780351 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.211626,11320.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.152692,11322.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.154954,11330.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,98.8454,809359 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.207902,11445.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.166164,11443.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,97.9064,789390 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.215041,11473.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.154052,11477.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.153735,11477.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,35.7972,295329 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.192909,11571.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.166784,11570 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,51.1848,432395 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.186804,11598.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.5652,163840 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.163956,11619.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.133972,11623.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.112468,11644.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,50.2553,433771 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.187722,11669.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.164503,11679.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,51.7372,440714 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.19282,11665.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.144609,11673.3 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.165991,11677.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,51.8961,438430 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.194461,11682.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.168932,11694.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,51.7607,439705 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.189213,11693.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.152148,11699.7 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.16537,11709.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.5476,167143 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.169943,11721 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.148192,11721 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.3335,237228 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.163809,11695.9 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.638,104706 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.138474,11688.3 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0751105,11690.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0770753,11699.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,26.7729,236639 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.16017,11726.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.140992,11720.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,27.803,238869 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.17377,11720.3 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.128241,11720.5 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.149443,11720.5 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,27.7809,238580 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.169303,11731.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.142506,11728 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,27.8468,239035 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.166442,11716.5 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.125236,11727.9 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.140637,11731.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.507262,11737.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.287022,11733.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.118522,11714.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.35282,14060.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_161.txt deleted file mode 100644 index 84970f1fe409bfade732399b96dcca33f74287d5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,48.6326,382441 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.202526,11056.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.170983,11054.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,103.385,825432 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.21745,11179 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.164506,11184.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,105.014,844958 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.217178,11311 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.148206,11312.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.151754,11316.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,107.129,873656 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.2093,11441 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.174301,11442.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,106.188,861404 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.208752,11481.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.156423,11486.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157261,11496.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,107.143,878038 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.207716,11599.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.169047,11605.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,106.132,879328 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.20818,11635.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.151335,11641.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.155604,11647.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,38.1243,314346 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.190292,11735.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.168061,11737.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,55.4229,473356 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.195706,11744.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,21.316,182671 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.165684,11793.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.13834,11791.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.129319,11803.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.8285,470884 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.188733,11827.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.167789,11827.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,55.8302,479591 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.190257,11810.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.150512,11818.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.163104,11833.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,55.5428,478772 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.188622,11847.3 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.164317,11854.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,55.4859,475940 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.19601,11254 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.143582,11248.5 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.158666,11261.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.477,164523 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.173245,11868.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.142557,11881.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.3618,236204 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.167892,11286.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.5708,104259 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.140826,11866.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0799075,11866.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0791585,11870.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,27.0259,240364 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.163123,11881.9 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.139795,11881.7 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,27.9777,245697 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.167933,11880 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.127971,11883.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.14249,11889.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,27.9434,244459 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.163664,11870.3 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.141876,11881.7 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,28.0524,244842 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.168151,11870.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.122461,11880.4 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.140733,11889.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.508731,11895.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.29009,11895.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.120023,11887.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.34269,12462 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_162.txt deleted file mode 100644 index 2f1f3b1e5436717e32c0807e70f2d21972b753a4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,49.2141,390239 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.205447,11101 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172835,11087.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,103.701,844682 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.21776,11211.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.170279,11209.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,106.573,862929 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216039,11318.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.151764,11316.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.1577,11318.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,107.64,882594 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.217053,11454.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.170574,11456.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,106.41,874664 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.217415,11485.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.158746,11488.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.168378,10907.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,107.563,890736 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.252208,11601.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.171156,11594.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,106.21,878230 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.214292,11645.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.160173,11640.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.15826,11642 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,38.645,322631 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.195962,11760 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.168909,11768 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,55.2585,475406 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.194413,11767.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,21.1175,183638 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.169248,11809.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.129863,11809.5 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.120483,11813.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.7737,472739 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.187588,11849.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.164023,11858.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,55.7519,480424 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.195216,11850 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.145911,11817.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.166791,11842.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,56.1452,484693 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.194375,11876.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.172128,11891.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,55.7553,474578 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.202647,11863.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.149229,11874.8 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.168653,11878.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.7031,170962 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.174887,11891.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.144996,11891.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.6268,243927 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.171047,11914.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.6937,106426 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.14586,11893.7 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0834371,11893.7 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0801792,11893.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,27.1345,242994 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.168564,11888 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.145725,11895.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,28.2077,246745 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.17568,11876.7 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.12961,11876.7 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.145057,11891.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,28.2379,249840 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.169825,11897.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.147549,11897.7 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,28.207,239408 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.176106,11895.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.128208,11895.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.148234,11901.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.514369,11903.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.289118,11905.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.120292,11895.8 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.39271,13557.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_163.txt deleted file mode 100644 index cd96db7497bcf1ae4672492cd0c9b31424348439..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,48.917,387530 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.201447,11074.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.164775,11077.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,103.775,838117 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.21514,11215.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.163821,11219.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,106.357,854075 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21345,11305.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.149136,11299.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.155927,11315 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,107.042,875508 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.206573,11445 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.183095,11450.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,106.307,872832 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.21002,11475.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.152874,11481.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.155399,11492.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,107.039,887648 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.206352,11595.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.169418,11603.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,106.703,888928 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.211642,11640 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.158547,11643.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.15465,11651.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,38.4583,320287 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.194628,11740.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.170791,11746.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,55.2686,473661 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.194435,11782.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,21.2744,182350 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.167949,11824.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.13272,11822.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.122724,11834.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,54.1731,472155 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.186141,11840.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.16338,11845.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,55.9995,480723 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.196103,11834.5 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.145139,11826.7 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.164205,11832.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,55.9716,481882 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.196506,11864.9 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.171933,11862.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,55.5609,476963 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.196423,11840.5 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.139981,11851.7 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.158179,11855.5 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.7503,171185 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.168394,11868.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.144119,11879.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.5084,242291 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.163031,11878.3 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.6804,106287 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.145466,11889.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0808514,11884 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0774689,11887.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,27.0126,241257 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.162461,11884 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.141376,11889.9 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,28.0699,244691 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.165706,11872.9 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.130356,11880.5 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.151485,11884.1 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,28.4861,249320 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.175658,11895.7 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.140631,11880.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,28.2804,248130 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.166903,11886.1 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.131748,11880.5 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.147271,11880.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.510679,11880.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.290846,11880.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.123501,11865.1 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.35638,13059.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_164.txt deleted file mode 100644 index 7f3da6f1d981d3f5ad957abc0318ae1ece976cf9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,48.8895,384754 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.198016,11106.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.174464,11106.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,102.8,833768 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.223373,11230.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169821,11242.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,104.198,843830 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211972,11341.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.146963,11347.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.153901,11351.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,105.549,860690 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.213869,11489 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.170711,11492.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,104.681,855337 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.210925,11517.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.146103,11523.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.154807,11529 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,105.782,880310 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.210506,11636.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.167152,11639.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,105.245,881034 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.210071,11663 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.152551,11674.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.163059,11678.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,38.0209,319075 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.193184,11774.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.165699,11774.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,54.6525,467069 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.193767,11789.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,20.9009,179117 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.167319,11839.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.141549,11848.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.135396,11850.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.2668,464535 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.187693,11847.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.163575,11861.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,54.9499,472197 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.195971,11851.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.154116,11851.5 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.165395,11858.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,55.38,476665 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.196314,11866.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.165891,11872.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,54.8451,470160 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.191667,11857.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.143427,11863 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.162717,11876.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.4799,168624 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.170973,11899.1 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.147757,11895.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.4145,241547 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.167955,11889.7 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.5731,105172 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.146445,11891.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0832931,11912.4 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0781089,11914.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,26.8719,239354 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.16264,11916.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.143469,11912.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,27.7771,242174 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.16807,11914.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.126004,11922.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.13968,11924.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,27.8599,244873 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.174852,11924.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.145005,11928 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,27.8445,244668 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.167303,11897.9 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.124483,11892.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.14384,11903.5 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.509579,11926.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.290983,11913.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.121802,11894 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.35496,14169.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_165.txt deleted file mode 100644 index f535f820c943898d9f59a276eab1f036f793a172..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,46.7706,366332 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.196864,10897.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165715,10884.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,101.034,805442 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214007,11024.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.166954,11027.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,103.116,814515 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209031,11118 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.155533,11123.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.155629,11129.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,104.52,841564 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.20441,11250 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.167997,11257.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,103.536,830087 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.207524,11273 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.149543,11278.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.156848,11278.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,104.167,851167 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.207072,11441.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168394,11434 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,103.355,848740 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.206832,11443 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.157267,11452.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.154877,10875.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,37.4778,304343 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.189639,11571.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.164474,11564.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,54.1309,457152 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.193898,11579.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,20.3232,171197 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.162643,11587.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.136397,11598.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.127139,11604.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.0287,456900 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.185508,11644.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.165882,11646.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,54.6935,462937 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.191965,11628.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.147143,11636.7 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.164816,11660 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,55.0262,469640 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.189905,11670.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.165431,11676.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,55.213,469002 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.191364,11658.9 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.145229,11672.5 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.162064,11680.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.2548,166311 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.166749,11698.1 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.142829,11698.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.3144,239688 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.16433,11689.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.4811,99846.8 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.142627,11691.9 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0968995,11688.1 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0832864,11697.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,26.8172,235710 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.15953,11691.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.142778,11683.9 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,28.0084,239588 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.167965,11680 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.126631,11681.9 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.143629,11681.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,28.4503,241887 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.168442,11685.7 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.150135,11695.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,28.0136,240097 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.16489,11695.1 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.129965,11689.4 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.146049,11693.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.512756,11702.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.28954,11704.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.123575,11680 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.34312,12874.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_166.txt deleted file mode 100644 index 4568dca5cc198e32be6a44e7ef13e0a7a0b089e7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,46.5866,363696 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.196045,10861.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167866,10869.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,101.518,809381 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.208125,10968.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.166212,10960.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,103.489,815567 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.215581,10515.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.149162,10524.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.155725,11108.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,104.95,841638 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.204362,11215.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.164708,11217.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,103.941,825927 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.207079,11248 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.148653,11248 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.159725,11253.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,105.078,854052 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.208765,11374.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168138,11380 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,103.767,849143 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.210032,11404.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.148023,11391.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.165571,11399.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,37.4473,304979 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.19322,11518.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.175434,11541.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,54.5451,451091 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.195261,11543.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,20.2246,169907 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.162151,11549 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.13777,11552.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.12792,11566.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.0257,453740 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.183991,11579.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.166506,11592.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,54.6933,460654 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.192186,11586.5 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.153712,11577 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.162308,11594.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,55.0575,465877 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.188503,11620.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.163811,11634.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,54.849,462686 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.204007,11616.7 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.146029,11630.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.161895,11634 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.4899,166249 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.1684,11663.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.146861,11670.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.4326,237190 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.162688,11632.1 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.5929,102938 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.139847,11645.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0863104,11641.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0836291,11653.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,27.0489,236606 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.164016,11658.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.142861,11664.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,27.9672,237033 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.162813,11660.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125462,11660.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.141907,11674.1 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,28.0632,241426 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.165491,11679.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.13993,11666.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,28.1957,242836 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.167428,11658.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.122228,11651.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.143012,11662.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.508704,11685.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.326176,11672.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.122052,11641.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.34069,12221.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_167.txt deleted file mode 100644 index 3a5e9b50bc4f7e1d36243820a746abfff29e080d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,46.0976,356388 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.196816,10880.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.17264,10882.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,100.932,797135 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.206067,10978.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.164071,10980.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,102.59,802668 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207629,11096.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.149181,11096.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.149274,11096.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,104.309,830819 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.209415,11206.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.180269,11211.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,102.598,826477 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.211831,11248.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.152788,11250.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.156989,11259.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,103.745,844893 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.207379,11399.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.169136,11405.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,103.469,840953 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.209504,11410.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.155722,11416.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.152909,11424.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,36.7554,300738 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.1845,11522.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.162394,11529.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,53.4419,449755 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.19576,11566.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.8333,167843 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.163091,11575.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.137405,11591.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.13127,11591.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,52.351,443942 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.191978,11616.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.163703,11612.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,54.1502,455838 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.191738,11604.1 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.141879,11606.3 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.163155,11631.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,54.4963,462300 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.189389,11629 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.16391,11636.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,54.407,460769 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.194861,11607.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.151738,11607.7 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.156486,11629 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.6738,166906 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.171543,11656 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.149437,11661.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,27.475,237689 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.166791,11661.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.588,101903 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.146013,11651.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.089053,11651.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0794466,11663.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,26.8633,236085 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.158327,11659.1 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.139853,11666.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,27.7566,236799 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.165412,11661 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.12457,11664.9 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.140538,11666.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,27.8764,239796 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.166035,11674.3 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.139789,11668.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,28.0507,241308 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.187427,11651.1 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.12649,11658.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.146099,11672.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.508394,11676.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.287876,11668.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.123875,11651.5 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.35771,13408.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_168.txt deleted file mode 100644 index 80dc433310b114fa54345faac279f4532cf8b4c8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,45.5094,352968 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.200602,10879 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.162103,10875.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,100.172,798998 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212454,11014.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.168288,11022 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,102.026,811658 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.218547,11117.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.152218,11112.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.158416,11119.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,103.418,826858 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.211335,11286.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.173642,11292.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,102.951,828657 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.214737,11309.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.156819,11315.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.152829,11317.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,103.667,834186 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.211239,11415.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.178768,11419.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,102.79,843458 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.209095,11456.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.151837,11447.7 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.155104,11449.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,36.8962,303297 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.188413,11556.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.16584,11566.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,53.9649,453644 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.191456,11568.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,19.9096,167004 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.164685,11595 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.138442,11600.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.131709,11614.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,52.4399,447907 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.188679,11637 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.163651,11648.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,54.0805,454884 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.192717,11621.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.147584,11633.1 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.166903,11654.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,54.4074,445952 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.191408,11679.1 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.16754,11681 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,54.3184,458929 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.195213,11667.1 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.144723,11671.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.160759,11688.5 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,18.9063,159096 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.167501,11700 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.142128,11698.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,26.87,234233 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.173047,11690.5 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,11.0846,96393 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.140516,11675.3 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0902083,11675.3 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0801635,11675.3 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,26.5137,232066 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.163696,11701.7 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.137037,11703.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,27.3732,237338 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.167344,11685.9 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125175,11682.3 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.148179,11699.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,27.2751,235005 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.164455,11705.1 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.146282,11689.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,27.4971,235337 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.170714,11693.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.12768,11695.4 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.144275,11697.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.499677,11699.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.290938,11699.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.124067,11691.7 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.37835,14053.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_261.txt deleted file mode 100644 index 6caeadddfdd40b8638bb3ef555563ea32760ed07..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,30.4317,249183 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.18295,11714.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.171367,11693.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,95.1785,823199 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.193626,11863.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169674,11865.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,96.08,823526 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.20386,11990.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.14537,12002.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.161354,12013.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,97.1222,856045 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.197712,12142.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.167699,12152.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,96.027,851055 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.202531,12241.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.154144,12243.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157949,12242.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,97.1949,870765 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.196445,12383.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.1645,12394.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,95.9803,870248 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.194339,12472.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.149552,12467.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.167159,12465.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,28.1835,256585 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.176781,12536 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.16575,12547.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,47.2133,439407 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.171853,12570.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,9.97099,92791 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.149904,12549.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.116605,12551.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.124522,12574.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,46.3887,437112 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.165469,12587.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.167197,12584 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,47.7867,439935 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.172295,12593.3 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.142474,12601.1 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.16168,12612.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,47.976,445621 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.176023,12635.4 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.160746,12641.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,47.7349,437372 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.173604,12637.1 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.144672,12650.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.168858,12662.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,15.8447,148487 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.148803,12668 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.13809,12669.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,25.8494,247000 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.143344,12671.5 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,6.35713,63182.7 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.120749,12669.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0672673,12662.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0744546,12675.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,25.6651,247505 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.146397,12652.3 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.137776,12656.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,26.3412,249413 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.15121,12658.1 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.123315,12660 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.144339,12673.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,26.2191,247889 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.146538,12677 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.139453,12673.3 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,26.6016,243341 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.151043,12039.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.121338,12047.4 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.138048,12049.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.418199,12043.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.251808,12032.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0764609,12018.9 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.24904,12640.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_262.txt deleted file mode 100644 index 9194ee12f9f9d142886a9915e8705f649b0f80bd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,31.084,264062 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.18576,11814.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.166448,11808.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,94.7327,823938 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.189786,11928.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.168368,11931.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,95.6342,834776 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.187223,12083.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.141821,12091.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.154775,12097.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,96.4715,854397 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.18544,12251.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.166947,12230.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,95.5344,853289 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.186301,12325.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.143562,12323.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.155834,12335.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,96.6132,871318 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.186845,12457.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168592,12465.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,95.5462,858667 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.184093,12558.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.146013,12554.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.157898,12560.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,27.9198,253552 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.168058,12627.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.163808,12623.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,46.8795,437433 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.163354,12648.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,9.84177,92534.6 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.143799,12629.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.120464,12642.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.128582,12660.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,46.1099,434555 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.159117,12666.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.163632,12668 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,47.4097,442133 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.168586,12673.7 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.143165,12681.3 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.164135,12690.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,47.5063,444863 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.165943,12706.1 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.160487,12711.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,47.4321,444154 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.166349,12700.2 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.141156,12707.9 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.159408,12725.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,15.5918,147182 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.143619,12715.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.141895,12723.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,25.6326,245643 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.140343,12723.3 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,6.12304,64077.9 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.117616,12711.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0659712,12711.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0825504,12719.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,25.4628,244787 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.137773,12717.4 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.137437,12719.3 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,26.1592,247632 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.14576,12757.5 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.120461,12749.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.138835,12761.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,26.1382,248720 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.139706,12764.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.137284,12768.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,26.3031,252754 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.150208,12779.7 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.124381,12785.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.141296,12787.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.386602,12791.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.245216,12772.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.0738144,12757 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,0.821626,12757 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_263.txt deleted file mode 100644 index c5025fd8696135af2d5ddcd46afef3c19fa2cfaa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,39.5327,347153 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.183283,12189.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165594,12191.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,137.109,1.23506e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.19592,12410.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.165744,12391.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,138.873,1.25827e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.201104,12559.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.152167,12555.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.154701,12565.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,139.762,1.27147e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.202093,12732.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.165715,12728.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,138.636,1.28659e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.194122,12806.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.144246,12787.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.156218,12791.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,140.121,1.29954e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.196861,12945.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.164419,12951.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.973,1.29785e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.196806,12991.5 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.147357,12995.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.155949,13005.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,39.2133,367153 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.172595,13077.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.158445,13085 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,68.5708,663113 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.180727,13114 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.2781,109844 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.150797,13071.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.135869,13079.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.16025,13083.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,68.0671,658180 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.181405,13125.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.161194,13127.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,69.0828,665526 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.177923,13125.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.147206,13123.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.164227,13140.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,69.3119,669416 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.177683,13158 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.1604,13152.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,69.2262,669360 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.178531,13183 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.142541,13184.9 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.157946,13188.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,21.6755,211362 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.15368,13209.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.14431,13204 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,37.4536,361340 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.150858,13221.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,7.03754,70462.1 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.126621,13179.2 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0792961,13171.5 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0759552,13177.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,37.1024,369906 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.148682,13207.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.137706,13209.7 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,38.1749,373664 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.15682,13213.5 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.128525,13217.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.146595,13219.3 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,38.2556,375394 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.168535,13217.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.1396,13221.2 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,38.1804,375556 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.152038,13222.5 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.13,13211.1 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.149703,13217.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.477028,13217.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.304611,13219.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.112051,13219.1 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.30261,14525.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_264.txt deleted file mode 100644 index 951b8c465bdacd5d15f980305c97861b76160712..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,39.323,345738 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.182237,12241.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167891,12237.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,137.261,1.23798e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.197783,12424 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.166653,12432 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,138.854,1.25972e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.197824,12569.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.157556,12557.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.155968,12569.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,139.918,1.28635e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.19745,12736.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.162301,12743.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,139.255,1.29224e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.199748,12852.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.146711,12846.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.155687,12854.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,140.069,1.31035e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.194759,12990.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.165734,12980.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.89,1.31491e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.194778,13026.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.152538,13005.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.152458,13016.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,39.0674,366522 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.173783,13125.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.165751,13119.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,68.6855,665142 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.173549,13175.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.2636,109301 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.151315,13110 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.13929,13119.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.156579,13127.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,68.1344,662735 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.171584,13175.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.160071,13160.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,69.2405,667781 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.183485,13162 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.143507,13156.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.158371,13160 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,69.5357,671279 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.178115,13188.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.167236,13179.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,69.1784,667827 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.179719,13190.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.145462,13190.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.163782,12523 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,21.7289,208856 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.154314,13200.1 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.139331,13223.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,37.6332,372906 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.157072,13246.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,7.01964,68656 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.134282,13204 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0863424,13196.3 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0791584,13200.1 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,37.1771,370294 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.149443,13242.4 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.136618,13229 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,38.075,372443 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.15465,13246.1 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.133415,13246.1 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.144989,13253.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,38.3939,374949 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.161687,13234.5 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.143827,13232.7 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,38.1879,373605 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.153891,13264.9 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.124851,13270.9 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.143171,13272.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.457245,13272.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.287354,13267 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.114048,13236.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.31037,14472.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_265.txt deleted file mode 100644 index 49017013dc5f934f9f238c0a687f8cadf1f64283..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,38.7707,340586 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.17944,12187.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.162285,12192.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,136.423,1.22811e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.197392,12401.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.168198,12414.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,138.429,1.22523e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.201869,12598.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.153661,12603.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.157661,12592.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,139.723,1.28525e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.196979,12776.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.177027,12778.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,138.335,1.28467e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.194768,12842.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.15399,12846.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.165734,12848.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,139.253,1.30786e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.196186,13013.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.170624,13005.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.422,1.30939e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.197789,13048.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.151229,13052.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.154845,13058.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,39.2862,367799 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.175018,13146.7 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.167898,13137 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,68.7495,664342 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.179974,13171.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.558,110251 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.148966,13139 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.143213,13125.5 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.152838,13138.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,68.1173,661585 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.174605,13156.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.161184,13160.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,69.2829,669275 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.184016,13167.9 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.154615,13166 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.164211,13173.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,69.504,671243 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.180176,13192.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.167926,12536.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,69.2877,663966 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.181718,13198.5 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.146775,13200.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.161718,13208 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,21.7855,210477 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.155574,13217.4 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.143063,13221.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,37.5259,372285 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.157907,13231 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,7.10395,69331.8 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.130576,13206 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0816704,13207.9 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0799008,13211.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,37.0512,370145 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.151184,13215.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.143306,13217.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,37.9731,370874 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.158128,13209.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.125568,13213.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.146739,13217.5 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,37.9281,371303 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.155536,13209.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.146294,13215.5 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,37.974,372004 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.15208,13215 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.129706,13207.3 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.14801,13211.3 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.446621,13221.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.288829,13225.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.118842,13217.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.30021,13879.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_266.txt deleted file mode 100644 index 595da5c025a8b2d21a8859f7b81759e0b119fa09..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,35.8886,295686 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.189232,11672.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.173853,11670.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,122.434,1.04701e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.202077,11838.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176003,11839.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,125.275,1.04456e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203037,11957.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.152624,11953.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.163437,11959.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,124.858,1.08686e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.204131,12140 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.180992,12147.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,123.823,1.08755e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.201479,12168.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.149837,12174.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.158503,12184 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,124.805,1.10516e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.202618,12362.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.169341,12364.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,123.764,1.10364e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.200304,12399.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.162019,12399.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.162538,12408.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,35.714,315738 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.179491,12482.7 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.168704,12471.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,61.0415,557179 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.179107,12488.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.4739,104720 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.151011,12477.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.141635,12480.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.159606,12494.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,60.0915,551111 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.174803,12528.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.174106,12526.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,61.07,556124 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.185459,12513.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.147242,12519.1 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.166086,12526.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,61.3889,559492 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.183411,12547.7 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.166381,12553.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,61.3062,561112 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.180656,12540.1 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.151719,12551.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.164122,12561 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.9997,183106 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.159094,12578 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.14415,12579.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,33.9728,317208 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.154083,12589.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,7.08242,65922.6 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.132074,12570.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0984769,12566.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0969025,12568.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,33.3788,316248 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.153856,12602.7 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.140653,12606.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,34.5072,318444 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.159126,12606.3 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.128755,12606.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.146691,12623.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,34.5515,321386 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.157776,12638.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.152224,12642.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,34.5678,322899 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.15791,12657.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.133059,12650 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.147149,12650 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.471735,12653.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.294282,12653.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.120842,12638.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.34878,14425.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_267.txt deleted file mode 100644 index df31c8040e2bee945a9f413ea6c2fe1bab857755..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,36.5494,304021 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.185594,11614.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.161011,11616.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,122.686,1.05018e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.204803,11767.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.172592,11168.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,124.667,1.0649e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.205824,11915.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.151706,11914.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.161552,11905.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,125.785,1.09457e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.20232,12079.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.1696,12083.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,125.033,1.08743e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.201053,12131.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.153542,12120 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.159015,12125.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,125.773,1.10955e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.209597,11642.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.164653,11651.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,124.837,1.10726e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.206019,12300.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.15695,12298.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.160086,12306 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,36.0871,318480 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.184445,12393.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.162842,12389.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,61.4807,560012 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.181046,12424 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.7315,107250 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.159431,11771.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.137757,11775.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.160611,11783 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,60.877,553995 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.181501,12424.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.168326,12429.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,61.4704,552925 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.184474,12426.1 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.147853,12433.9 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.175779,12439.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,61.8327,561909 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.183888,12466.1 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.169379,12469.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,61.4219,560322 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.18993,12466 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.142829,12464.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.166166,12466.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,20.0654,180829 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.160458,12494.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.143213,12496.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,33.8782,316751 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.15537,12521.3 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,7.02251,66236.9 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.130816,12485.3 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0892032,12494.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0848225,12511.9 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,33.3996,315344 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.150147,12515.5 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.143674,12517.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,34.5209,321187 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.158765,12532.5 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.127437,12538.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.143302,12560.9 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,34.544,321689 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.159181,12587.1 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.143968,12571.9 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,34.5306,320699 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.157373,12584.7 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.128682,12581.1 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.145571,12585.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.469223,12584.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.299597,12577.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.119949,12554.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.36106,13735.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_268.txt deleted file mode 100644 index 0fdf2c3e3f76b6851c39755e368bd4ac9013fbda..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,35.1239,295538 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.182758,11727.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.164346,11729.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,121.773,1.04894e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.197773,11912 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169786,11917.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,123.171,1.06445e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.20489,12037.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.150778,12039 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.153834,12042.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,124.012,1.08809e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.206176,12212 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.165619,12213.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,123.145,1.078e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.198598,12259.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.148278,12257.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.161981,12263.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,124.316,1.10887e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.201136,12431.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.168058,12419.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,123.044,1.1028e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.201034,12454.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.14545,12465.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.152358,12469.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,35.5114,317122 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.180973,12530.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.165341,12534 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,60.6231,554642 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.185914,12562.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.3358,102844 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.149994,12555.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.138832,12558.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.157642,12562.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,60.2884,557059 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.177584,12545.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.166579,12560.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,61.0292,548165 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.184675,12547.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.14408,12549.5 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.167789,12555.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,61.1004,562212 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.181363,12623.5 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.161357,12610.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,60.7832,558608 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.179795,12610.3 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.138381,12606.5 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.165776,12614.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,19.9309,182130 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.162442,12635 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.145888,12636.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,33.8825,317811 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.156323,12659.7 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,6.96137,64378.3 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.139034,12638.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.114899,12642.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0988992,12646.5 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,33.1405,315642 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.153949,12701.5 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.141235,12680.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,34.2446,322151 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.160592,12690.1 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.124125,12684.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.143011,12688.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,34.4339,323293 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.162896,12686.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.141651,12695.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,34.2692,322060 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.16088,12697.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.127555,12697.7 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.146525,12697.7 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.465504,12699.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.288192,12699.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.120883,12693.9 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.39848,15783.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_269.txt deleted file mode 100644 index b7d2ecb6a5df940372f102775172e8c560ffd7c4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,35.9376,303218 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.185997,11713.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.16551,11711.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,121.801,1.04827e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.198381,11889.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.168195,11893.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,123.312,1.06282e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.206211,12016.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.15176,12014.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.158547,12020 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,124.478,1.09033e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.198874,12203.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.176634,12189.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,123.559,1.09049e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.200608,12241.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.148755,12228 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.157936,12237.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,124.433,1.09011e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.19607,12410.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.170048,12406.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,123.773,1.08023e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.199779,12456 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.151613,12450.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.159293,12454.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,35.6177,317720 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.180666,12516.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.16599,12516.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,61.1719,557044 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.180112,12547.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,11.3974,104551 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.152074,12532.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.139946,12541.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.161942,12551.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,60.5135,550811 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.179066,12568.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.167898,12562.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,61.1387,556899 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.182877,11941 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.137699,11929.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.161021,12576.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,61.2741,562490 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.180557,12585.7 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.167786,12599 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,61.062,561749 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.181517,12598.9 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.144045,12597.1 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.161789,12600.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,20.1235,182024 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.163299,12627.7 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.144845,12631.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,33.9154,319167 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.158173,12642.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,7.03865,67488.9 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.131427,12608.7 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.0977376,12620.1 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.0964256,12622 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,33.2802,315729 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.155763,12637.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.138186,12631.5 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,34.4401,320391 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.160784,12663.7 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.12617,12650.5 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.147306,12658.1 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,34.4106,323358 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.156493,12675 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.143901,12667.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,34.4259,322235 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.158794,12695.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.126093,12693.9 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.143821,12703.5 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.459843,12711.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.290442,12686.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.121517,12671.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.37308,15102.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp32_perf_fp32_120.txt deleted file mode 100644 index 2323ecc3dacbed4c5b302a388e99a12e7836a20b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet18_cifar10/resnet18_cifar10_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,222 +0,0 @@ -Conv1,88.955,715067 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.224621,11381.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.16934,11387.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,241.657,2.03478e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.209395,11869.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.14703,11871.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,244.064,2.05992e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.219187,12109.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Add4,0.188809,12120.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu3,0.188102,12127.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,245.988,2.13135e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add5,0.215542,12382.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu4,0.212092,12385.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv5,246.197,2.13653e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add6,0.217254,12354.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Add7,0.202937,12358.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu5,0.196607,12358.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,248.37,2.18167e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add8,0.223836,12554.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu6,0.216799,12557 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,254.441,2.24353e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add9,0.224111,12452.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Add10,0.20151,12453.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.195772,12471.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv8,63.85,549473 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add11,0.205199,12553.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu8,0.198284,12549.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,99.6746,865510 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add12,0.216694,12560.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Conv10,24.7638,221065 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add13,0.183145,12567.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Add14,0.167753,12568.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu9,0.169199,12571.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,99.8741,901134 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add15,0.206124,12536.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu10,0.166467,12526.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,97.7606,859026 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add16,0.211369,12457.7 -Add16_f2h,0,0 -Add16_h2f,0,0 -Add17,0.175625,12461.5 -Add17_f2h,0,0 -Add17_h2f,0,0 -Relu11,0.173411,12461.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,100.178,878176 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add18,0.211887,12489.9 -Add18_f2h,0,0 -Add18_h2f,0,0 -Relu12,0.189865,12482.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,97.2912,856454 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add19,0.21182,12419.1 -Add19_f2h,0,0 -Add19_h2f,0,0 -Add20,0.177296,12421 -Add20_f2h,0,0 -Add20_h2f,0,0 -Relu13,0.177728,12428.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv15,35.0712,307328 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add21,0.194115,12497.7 -Add21_f2h,0,0 -Add21_h2f,0,0 -Relu14,0.168073,12507.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv16,55.057,488891 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add22,0.206614,12537.9 -Add22_f2h,0,0 -Add22_h2f,0,0 -Conv17,14.4345,130102 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add23,0.174803,12513.1 -Add23_f2h,0,0 -Add23_h2f,0,0 -Add24,0.146784,12518.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -Relu15,0.173859,12520.7 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,53.9116,481649 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add25,0.197971,12575.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -Relu16,0.161423,12585.3 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,54.2887,491768 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add26,0.200943,12613.9 -Add26_f2h,0,0 -Add26_h2f,0,0 -Add27,0.146553,12615.9 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu17,0.173961,12617.8 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,54.2638,481821 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add28,0.198764,12663.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -Relu18,0.16174,12649.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,54.2923,489881 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add29,0.202617,12678.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -Add30,0.14694,12674.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -Relu19,0.192633,12682.1 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Pool1,0.593189,12686 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Mul1,0.537061,12688 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add31,0.124991,12688 -Add31_f2h,0,0 -Add31_h2f,0,0 -Softmax1,1.10314,12678.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_120.txt deleted file mode 100644 index 76d15fea328a534e3c1a7e3ea2c179996dbdea76..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,222.258,2.0749e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.442448,26847.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.275467,26862.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.74474,55144.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.81639,35081.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,91.0332,928887 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.280939,27160 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.381214,27666 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.193611,27156 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,143.725,1.4505e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.412452,27500.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.485623,28109.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.308447,27492.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,115.859,1.15275e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.395543,27672.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.11319,31018.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,121.961,1.24427e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.388675,28054 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.1069,31686.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.285963,28035.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.318866,28046.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,160.515,1.67448e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.39484,28514.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.47889,29346.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.311915,28534.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,147.854,1.54288e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.415806,28745.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.489366,29611.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.304626,28749.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,115.701,1.20405e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.40035,28830.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12104,33492.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.296953,28853.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.335487,28857.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,160.436,1.71285e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.388203,29180 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.467575,30156.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.302168,29161 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,147.717,1.58411e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.420554,29365.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.486832,30396.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.313963,29362.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,116.062,1.23173e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.41594,29403.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.10304,34761 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.288517,29400 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.308248,29400.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,92.3545,1.00728e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.332414,29511.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.271608,29519.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.274206,29515.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,89.6765,985036 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.385354,29629 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.315198,29632.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.288933,29632.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,65.2853,714573 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.361253,29618.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.703759,31895.8 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,85.8508,952153 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.343018,29915 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.70612,32257.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.27141,29930 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.31845,29918.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,83.7199,938025 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.331915,30140.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.31132,30125 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.281701,30128.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,90.0234,1.00289e+06 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.376228,30205.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.311915,30190.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.294481,30194.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,64.8775,715057 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.36378,30160.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.705289,32583 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.2774,30152.2 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.29829,30148.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,83.082,941347 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.325528,30366.4 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.305522,30355 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.295685,30343.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,89.579,1.00933e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.383781,30462 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.312107,30462 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.289656,30450.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,65.679,735392 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.352721,30423.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.700783,32971.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.266955,30442.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.311871,30427.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,83.475,950863 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.322143,30556.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.298949,30530.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.284524,30522.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,89.5125,1.01373e+06 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.379492,30576 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.312849,30568.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.289771,30579.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,65.02,732056 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.357079,30545.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.701187,33158.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.268953,30526.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.308959,30538 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,44.9954,522675 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.294277,30595.2 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.233733,30580 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.163084,30580 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,55.8593,644514 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.334571,30663.8 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.314469,30622 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.262501,30595.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,39.7049,459455 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.316766,30583.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.627164,33178 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,54.4462,628014 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.305176,30754 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.640425,33401.8 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.236959,30761.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.210693,30765.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.9702,560717 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.287179,30911.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.262834,30896.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.207717,30854 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,55.6275,648445 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.334936,30934.2 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.313138,30911.4 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.279863,30914.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,39.3969,458812 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.302949,30817.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.596573,33480.8 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.205784,30794.6 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.175743,30767.4 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,48.1546,559611 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.291487,30967.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.261042,30956.2 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.217541,30929 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,55.687,650075 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.329124,30924 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.312491,30908.6 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.268952,30908.6 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,39.6102,465238 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.305777,30855 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.59313,33548.2 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.202629,30843.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.169305,30851.2 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,48.1046,561871 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.295256,30943.4 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.260997,30920.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.215461,30885.8 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,55.5951,647114 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.336408,30936 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.302699,30905.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.26341,30897.6 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,39.7209,464708 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.321328,30801.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.629953,33487.2 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.209598,30797.8 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.169573,30797.8 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,48.1338,561238 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.292305,30943.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.264645,30932 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.216831,30882.4 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,55.6049,650375 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.327671,30936.2 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.272312,30924.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.263237,30905.8 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,39.8099,461974 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.309188,30825.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.607407,33495.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.216894,30802.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.177676,30806 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,47.9678,560014 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.28709,30936.2 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.265036,30905.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.218297,30913 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,55.7986,645697 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.327218,30932.8 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.272556,30921.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.257401,30909.6 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,39.8499,459340 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.311985,30879.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.587318,33603.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.214584,30852.4 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.174379,30837 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.0353,319367 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.249739,30909.6 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.187717,30886.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.138041,30898.2 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,43.8386,509940 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.301963,30963.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.360465,30971 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.232357,30906 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.2759,311413 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.272785,30883.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.624981,34934.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,41.4418,496244 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.282853,31000.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.612125,35085.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.12702,31000.8 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.142144,30997 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,36.6876,435790 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.273567,31100 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.232082,31050.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.147743,31050.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.5691,509513 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.307409,31149.8 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.285048,31149.8 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.237906,31138.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.4615,316125 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.272042,31070 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.627721,35140.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.140921,31054.8 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.148716,31039.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.6846,436449 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.282334,31161.6 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.217605,31146.4 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.148044,31127.4 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.3849,483902 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.300376,31165.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.282494,31169.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.234245,31169.2 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.4158,316772 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.278558,31097 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.615042,35152.6 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.124242,31078 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.135564,31078 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.614575,31074.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.25804,37872.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.135737,31074.2 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.86336,121794 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_151.txt deleted file mode 100644 index 17423a25717e5f1ef4224d5c5c1fdf2d49e757fd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,161.277,1.49722e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.437943,26583 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.245022,26590.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.93526,55943.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.79932,35419.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,86.4092,867761 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221208,26870.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.31276,27399.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.153382,26862.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,71.9345,727264 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.384932,27068.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.480458,27639.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.291487,27071.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,116.321,1.16146e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.366949,27352.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.08605,30628.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,127.294,1.29861e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.357188,27822 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.09975,31416.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.274488,27822 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.303947,27810.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,121.623,1.27096e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.340593,28135.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.460567,28939.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.289874,28150.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,74.843,780984 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.389528,28204 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.485681,28230.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.30003,28200 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,117.018,1.21288e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.362577,28472.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.11612,32858 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.282622,28468.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.313067,28472.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,121.697,1.29861e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.338628,28756.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.450673,29649 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.294111,28737.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,75.0919,797139 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.388612,28786.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.489814,29752.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.306109,28779.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,116.416,1.23036e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.382213,28989.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.1032,33986.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.291582,28955.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.299326,28963.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.2068,888263 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.299499,29050.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.272868,29054.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.17011,29054.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,49.1144,537753 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.337585,29130.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.313495,29126.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.290679,29130.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,67.2893,729534 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.322308,29156.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.658723,31266.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,80.8107,884974 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.325784,29418.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.709072,31605.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.26444,29419.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.236965,29423.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,63.4299,708703 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.303756,29593.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.297727,29562.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.214969,29577.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,48.5833,535714 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.332677,29573.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.313304,29573.8 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.275672,29562.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,66.6726,738144 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.322942,29579.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.683464,31833.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.234987,29580.2 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.185554,29580.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,63.3907,713946 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.318301,29726.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.27434,29715.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.211729,29715.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,48.7588,541697 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.334642,29715 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.276453,29715 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.290021,29726.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,66.9411,746132 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.325509,29750.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.619472,32089.4 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.235423,29724.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.183282,29728.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,62.9943,713105 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.31013,29867 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.273766,29855.8 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.220639,29848.6 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,48.6576,542380 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.335166,29866.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.273124,29855 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.291678,29859 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,66.7553,745351 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.31619,29880.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.632329,32280 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.23445,29876.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.183461,29872.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,39.8456,450007 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.277516,29947.4 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.201535,29909 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.146265,29917 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,33.1384,385340 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.298629,29915 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.266763,29923 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.20604,29919.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,38.2738,443559 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.287417,29918.4 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.511338,32341.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,50.0473,571299 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.295723,30120.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.535844,32600 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.13381,30120.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.154309,30120.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,37.8943,432228 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.275147,30148.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.216997,30122.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.145849,30102.8 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,33.1145,379146 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.305566,30195 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.27557,30164.4 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.23877,30103.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,38.3963,443739 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.293425,30085.6 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.523005,32599.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.133797,30089.4 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.145548,30089.4 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,37.005,423317 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.280523,30206.8 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.21717,30195.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.149612,30145.8 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,33.089,383848 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.300561,30184.2 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.269573,30177 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.219045,30158 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,38.3927,443904 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.282028,30143.4 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.522372,32688 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.136665,30132 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.140575,30120.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,36.8716,423474 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.272529,30203 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.208875,30188.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.146035,30161.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,33.0235,384188 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.310783,30219.6 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.271762,28690.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.209067,30193.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,38.615,446069 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.287557,30148.2 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.524836,32719.4 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.134003,30148.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.142399,30148.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,36.9047,424024 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.279595,30216 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.212831,30223.6 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.147193,30185.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,33.0697,384299 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.306758,30224.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.275717,30224.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.219756,30212.8 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,38.6799,452297 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.298751,30167.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.515383,32769.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.134469,30167.6 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.142284,30171.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,36.7945,424393 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.275973,30277.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.208985,30254.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.147001,30220.6 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,33.0679,381838 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.307762,30277.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.265765,30243 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.228114,30212.4 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,38.4138,444998 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.287371,30159.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.52682,32768.8 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.134489,30174.8 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.138335,30174.8 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,22.589,271869 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.205234,30181.2 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.145875,30166.4 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.130707,30152 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,26.7038,309479 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.26252,30133.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.196485,30133.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.14885,30098.8 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,22.1396,265728 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.217355,30072.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.547548,33921 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,34.6527,393681 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.273413,30160.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.594492,34032 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.136147,30103 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.147941,30103 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,27.3005,330137 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.202751,30107 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.152057,30107 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.135743,30103.2 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,26.5471,308632 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.257349,30068.8 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.199666,30049.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.143832,30022.8 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,22.234,269683 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.219601,29969.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.548221,33871.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.110355,29965.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.142246,29958 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,27.2951,326937 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.213369,29969.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.160793,29965.6 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.134348,29954.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,26.3678,304418 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.270353,29923.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.204568,29908 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.152408,29889 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,22.3208,266934 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.22003,29850.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.547914,33695.2 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.110444,29835 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.132806,29823.2 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.592413,29823.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.23316,36239.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.126418,29835 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.87424,118486 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_152.txt deleted file mode 100644 index 65b63cbff6ee7eb4de969593d130343c438ab6d6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,162.322,1.52781e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.436158,26779.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.234085,26783.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.78165,53550.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.80536,35479.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,86.5925,881289 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220376,27068.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.310834,27593.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.158777,27075.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,74.8456,755086 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.376696,27221.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.486884,27800.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.304804,27236.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,117.01,1.17805e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.36929,27490 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.17302,30754.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,128.337,1.31563e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.360267,27979.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.08126,31593.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.261458,27971.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.298366,27956.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,123.913,1.29701e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.342955,28334.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.461482,29089 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.310303,28319 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,77.3257,805207 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.38938,28380.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.466839,29207.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.302655,28372.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,117.397,1.22998e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.363492,28601.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.07876,32926.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.272082,28617 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.309304,28613.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,123.748,1.32248e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.337784,28885.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.454244,29793.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.297989,28874.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,77.7857,827634 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.389624,28950.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.477482,29877.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.29832,28939.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,117.627,1.25466e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.365746,29100 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.08466,34027.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.264901,29119.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.303711,29100.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.4163,905696 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.294437,29248.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.22012,29252 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.159391,29255.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,49.064,541506 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.328504,29274.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.30483,29252 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.279295,29255.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,67.2289,734384 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.325285,29240.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.66744,31377 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,80.3761,885858 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.324055,29542.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.667843,31702 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.278809,29547 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.238066,29547 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,63.4284,710517 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.303518,29689 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.295275,29696.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.223358,29700.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,48.8434,542391 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.33578,29685 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.310603,29685 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.286219,29696 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,67.1127,747308 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.31882,29655.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.679171,31914 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.243461,29694.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.181772,29702.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,63.2483,715200 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.307575,29882.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.299697,29837 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.216704,29840.8 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,49.5055,552676 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.33708,29854.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.30684,28283 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.284548,28287 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,67.5103,754609 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.320107,29856.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.657501,32191.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.233132,29841.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.197964,29834.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,63.3259,718270 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.297201,29988 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.293892,29961.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.214258,29969.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,48.991,554526 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.334539,29961 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.312036,29949.6 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.284133,29957.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,66.955,750915 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.325503,29945.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.665117,32341.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.229772,29941.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.191116,29933.4 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,39.8798,452314 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.265106,29992 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.226661,29976.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.142649,29965.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,33.1928,382874 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.295128,29998.8 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.297317,29964.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.217862,29949.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,38.4575,446531 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.283333,29903.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.544074,32344.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,50.2538,574126 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.299044,30069.2 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.570378,32517.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.129746,30046.6 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.147564,30047 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,37.1679,424322 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.277617,30164.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.253343,30161 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.144773,30126.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,33.0095,382176 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.292946,30164.8 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.294923,30153.2 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.224139,30107.4 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,38.5521,446750 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.290744,30104.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.571491,32606.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.129016,30104.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.141541,30104.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,36.9386,423284 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.269515,30188 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.233753,30172.6 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.14174,30134.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,33.1224,379318 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.299353,30173 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.29516,30165.2 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.226777,30153.8 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,38.3644,441241 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.287377,30162.2 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.592399,32668.6 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.134304,30121.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.140185,30132.6 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,37.0016,423906 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.270276,30226.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.243237,30230.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.145586,30192.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,32.9933,379818 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.293957,30231.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.294686,30204.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.223903,30177.6 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,38.4187,444392 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.281348,30143.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.523965,32703.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.130419,30140.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.140556,30144.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,36.7462,423939 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.268798,30231.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.201407,30232 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.141273,30220.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,32.9878,378675 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.2974,30209.2 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.266462,30197.6 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.230828,30198.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,38.3983,447953 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.284005,30198.2 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.513821,32799.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.129734,30205.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.15061,30190.6 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,36.7534,423099 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.270129,30247.8 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.209489,30251.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.143627,30232.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,33.2255,389628 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.297746,30244.2 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.264063,30228.8 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.217017,30233 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,38.3565,447951 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.285015,30206.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.506621,32819.4 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.128345,30179.8 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.138111,30187.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,23.4658,271850 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.241042,30202.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.158636,30175.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.130854,30164 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,29.5053,339899 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.261996,30156.8 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.228486,30141.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.152249,30118.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,22.889,272128 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.219589,30103.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.54353,33974.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,35.5487,403543 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.256638,30222 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.586621,34154.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.130585,30218 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.153317,30206.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,29.1081,332480 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.243006,30283 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.191417,30283.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.134598,30260.6 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,29.3091,337351 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.268472,30230 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.232293,30230 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.153567,30226.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,23.0443,274171 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.22309,30165 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.538141,34101.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.107986,30165 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.133298,30149.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,29.3233,333412 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.242002,30215 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.188844,30199.6 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.140057,30195.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,29.7913,342956 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.277387,30169 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.241522,30157.4 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.155206,30123 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,23.0371,272128 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.23109,30115.2 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.546096,34048.6 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.110629,30100 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.129702,30100 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.576169,30100 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.20041,35364.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.195615,30084.6 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.72265,116436 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_153.txt deleted file mode 100644 index 5b0c6da1e9835f283fdb03412acded28d054a5e6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,154.881,1.39426e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.466174,25471.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.294892,25483.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.66466,61522.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.8784,33553.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,81.8855,800623 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.307423,25782 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.325067,25789.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.171865,25774.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,72.5312,695061 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.386955,25939 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.462193,26491.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.30316,25938.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,112.118,1.06904e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.365476,26266.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.10847,29393.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,122.337,1.19766e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.364977,26769.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.08946,30267.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.276382,26811.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.299351,26788 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,121.033,1.21575e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.338123,27131.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.467229,27878.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.29541,27127.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,74.4617,744821 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.398219,27207.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.47084,28000.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.302668,27211 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,112.217,1.12816e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.361803,27420 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.09068,31652 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.283467,27423.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.324094,27442.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,121.17,1.24918e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.338988,27749.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.443281,28650.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.291103,27761.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,74.5518,767468 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.383985,27826.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.475492,28760.8 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.307218,27842 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,112.049,1.14599e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.363665,28059 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.08552,32891.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.268242,28039.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.30931,28040 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.089,867210 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.295851,28165.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.224261,28172.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.159992,28176.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,48.7043,510464 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.331205,28188.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.317279,28203.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.2814,28211 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,64.7854,679390 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.31685,28236.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.647459,30292.8 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,78.1458,826825 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.317604,28510 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.644643,30631.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.271288,28538 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.245285,28545.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,62.8648,683767 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.294712,28746.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.256907,28738.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.221835,28738.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,48.6925,523576 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.330628,28791.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.309124,28799.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.282507,28788.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,64.5534,692488 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.323869,28781.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.641878,30986.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.254795,28790 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.191698,28790 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,62.7471,687812 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.292856,28890.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.261221,28894.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.218284,28894.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,48.5235,522385 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.330103,28905 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.299639,28913 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.278494,28924.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,64.9505,698029 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.330744,28960.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.640349,31245.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.254686,28968.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.191116,28972.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,62.7637,689156 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.297995,29126.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.264517,29134.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.218284,29123 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,48.6746,530442 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.332523,29163.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.304709,29156 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.280882,29137.6 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,64.5648,700755 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.320146,29124 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.629795,31516.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.253054,29166.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.197145,29170 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,39.3902,437784 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.282847,29232.8 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.195135,29217.6 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.151717,29191.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,33.2572,374470 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.293092,29228.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.263659,29182.6 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.212978,29156 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,37.8866,416740 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.278962,29189 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.548023,31588.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,49.2656,548750 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.292216,29341.2 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.551229,31768.2 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.146623,29352.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.150764,29352.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,37.4703,419227 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.274719,29436.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.212888,29436.8 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.148544,29440.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,32.9521,373111 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.300337,29466.8 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.267301,29451.4 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.221157,29409.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,38.0559,421559 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.277624,29424.6 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.630025,31889 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.158854,29378.6 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.153465,29382.4 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,36.9282,413650 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.282052,29485.8 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.215326,29489.6 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.148972,29493.6 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,33.0634,375573 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.30092,29563 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.264696,29516.8 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.227653,29466.6 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,37.8484,423841 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.28476,29513.4 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.552183,31966 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.149324,29471 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.152908,29471 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,37.1316,416315 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.283986,29609.2 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.219275,29590 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.147315,29555.6 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,33.253,380507 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.298034,29574.8 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.264511,29563.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.216824,29532.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,37.9441,425753 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.281144,29521.4 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.544509,32023.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.145132,29513.6 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.148178,29498.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,36.8519,415022 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.267993,29617.4 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.208248,29602 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.147468,29586.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,33.113,376414 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.301438,29579.2 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.328049,29552.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.219006,29517.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,38.2189,428933 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.282444,29502.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.532042,32028.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.147807,29495 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.144696,29498.8 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,36.9107,413669 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.284325,29602.2 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.215685,29594.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.149811,29571.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,33.1354,377887 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.304427,29583.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.267602,29541 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.215435,29510.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,38.1229,426276 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.277861,29560.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.542455,32131.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.141919,29533.6 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.14318,29522 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,22.5776,265834 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.205087,29556.2 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.152338,29556.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.138809,29533.2 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,27.414,308877 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.252191,29471.8 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.210462,29471.8 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.147788,29456.6 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,22.3812,264550 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.216171,29414.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.535364,33256 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,34.752,385615 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.268031,29533.4 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.591761,33382.4 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.127071,29525.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.140767,29521.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,27.3634,322090 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.210488,29556.4 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.140217,29548.8 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.132031,29533.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,27.1453,302907 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.256594,29487.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.203961,29487.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.145919,29453.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,22.5727,266023 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.223788,29395.8 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.540617,33248.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.114143,29392 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.138911,29388.2 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,27.3115,320898 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.212273,29434 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.143405,29418.8 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.131839,29388 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,27.2872,302823 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.257631,29357.6 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.211833,29342.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.151666,29323 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,22.5029,264889 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.216281,29273 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.536426,31828.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.106489,29257.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.131033,29238.2 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.571792,29234.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.31415,35662.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.18076,29234.4 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.81389,113120 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_154.txt deleted file mode 100644 index 067c920d312c59f9744c6b44995cba3dc9da1698..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,157.412,1.44002e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.426526,24981 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.238553,24992.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,4.97158,53004.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.63666,33152.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,85.7847,864414 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.251442,26673.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.312069,27214 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.145926,26688.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,74.0809,731025 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.380926,26802.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.472272,27400.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.304971,26829.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,113.196,1.11623e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.36746,27088.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.09909,30296 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,123.136,1.24187e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.364293,27520.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.09994,31054 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.272811,27520.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.308165,27509 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,122.748,1.26734e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.345758,27815.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.459882,28574.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.286898,27820.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,76.3162,785326 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.413291,27907 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.49345,28723.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.298609,27931 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,112.878,1.15786e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.379838,28077.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.09535,32336.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.281636,28100.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.306954,28081.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,123.218,1.29815e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.345298,28352 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.448587,29271 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.285061,28386.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,76.3109,799976 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.386641,28459.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.467153,29374.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.299596,28432.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,113.042,1.17779e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.365445,28576.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.09179,33428.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.269726,28585.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.309393,28578.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.3655,887395 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.317713,28696.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.211417,28696.2 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.158912,28700 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,48.9766,528817 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.329003,28685.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.316798,28692.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.280562,28692.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,64.8027,694985 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.31427,28712.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.64056,30779.8 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,78.3311,843330 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.313381,28993.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.626512,31110.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.276158,29016.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.251454,29016.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,62.9651,693034 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.295518,29153.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.256543,29138 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.221964,29153.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,48.4493,529819 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.340977,29168.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.30883,29157.2 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.275044,29153.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,64.5001,703081 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.313451,29183.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.635261,31407.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.240779,29198.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.192492,29187.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,62.9989,700218 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.304178,29316.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.26213,29294 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.211353,29301.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,49.2558,539968 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.336613,29317 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.309387,29305.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.284017,29317 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,65.2136,715036 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.331486,29307.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.673769,31604.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.232555,29289.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.179698,29297.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,63.0713,702641 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.300715,29427.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.266252,29423.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.222828,29427.6 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,48.9043,538881 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.334436,29438.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.300997,29454.6 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.278214,29458.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,64.7474,710926 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.313753,29398.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.644713,31798.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.233675,29414.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.194936,29426.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,39.5635,441931 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.283596,29477.4 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.186444,29481.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.140466,29450.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,33.2257,382372 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.30698,29484.6 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.255805,29469.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.203441,29473.4 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,37.8014,420480 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.277144,29445.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.557737,31872.2 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,49.3701,556516 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.292926,29663.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.519274,32112.2 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.14069,29632.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.148063,29606 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,36.8472,408116 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.277093,28481.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.357189,28489.6 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.147686,28466.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,33.0947,368114 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.294905,29740 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.329503,29724.6 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.225835,29709.4 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,37.9501,423559 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.28419,29674 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.557891,32122.8 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.160114,29658.8 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.151301,29662.8 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,37.0226,417085 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.26883,29724.2 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.210149,29709 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.147206,29705 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,33.1235,376699 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.304235,29705 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.268101,29708.8 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.2283,29686 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,37.8752,420981 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.28149,29647.6 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.561463,32134.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.157068,29655.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.151455,29655.2 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,36.9585,408188 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.277784,29765.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.204543,29765.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.144562,29731.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,33.0791,377044 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.297194,29754.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.259218,29727.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.21715,29708.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,37.9043,424006 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.282616,29693.2 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.548387,32211 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.141273,29685.6 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.138105,29685.6 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,37.064,418788 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.27884,29743 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.20677,29735.4 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.143935,29716.4 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,33.1562,376862 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.299825,29693.6 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.269265,29701.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.222162,29701.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,37.8704,424139 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.275883,29689.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.561386,32238.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.154866,29701.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.148389,29705 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,37.0441,414478 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.270879,29747 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.210706,29750.8 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.149049,29731.6 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,33.4192,378709 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.304953,29747 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.267794,29724.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.22981,29701.4 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,38.1146,427383 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.298494,29739.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.553098,32253.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.150943,29712.8 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.16142,29697.6 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,23.2619,262910 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.243991,29751 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.172358,29751 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.139942,29701.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,29.4479,325576 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.313118,29697.6 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.235096,29697.6 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.157829,29682.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,23.4012,269476 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.247013,29648.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.539818,32215.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,35.8414,412556 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.256472,29755 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.546928,32318.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.117901,29747.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.14503,29743.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,29.3886,330189 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.240606,29812 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.186482,29808.2 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.155845,29755 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,29.1706,330761 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.275525,29747.4 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.213388,29728.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.152934,29693.8 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,23.5147,269612 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.256504,29678.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.555767,33523.2 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.10871,29678.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.139443,29678.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,29.3091,327858 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.235557,29739.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.203141,29724.2 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.135724,29693.8 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,29.4449,337069 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.273938,29705 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.245151,29705 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.148031,29689.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,23.349,269193 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.244395,29659.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.538243,33489 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.113638,29640.4 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.133036,29640.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.566122,29625.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.21819,36033.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.129061,29625.2 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.76049,114679 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_155.txt deleted file mode 100644 index 37a7257c20638a86157576ba9927b5aaa0c3ffbe..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,183.655,1.70499e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.456855,26896.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.24339,26873.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.25813,61030.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.67416,35320.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,89.6427,920007 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.234719,27194 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.324184,27723 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.152524,27182.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,89.23,906289 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.406942,27378 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.479761,27948.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.310475,27354.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,126.38,1.27529e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.37779,27665.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.14248,30987.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,134.546,1.3891e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.373188,28139.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.11864,31847.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.271109,28139.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.308266,28127.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,137.138,1.44182e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.375184,28505.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.484829,29325 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.3115,28498 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,92.5404,975203 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.404177,28539.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.489238,29379 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.301674,28532.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,126.653,1.33032e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.378743,28758 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.10453,33265.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.277784,28784.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.311582,28754.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,137.615,1.48066e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.370686,27588.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.456746,30026.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.291621,29072.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,92.7001,993818 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.406884,29084.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.486877,30083.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.301214,29099.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,126.857,1.35328e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.379511,29238.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.10819,34343.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.27477,29251.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.30197,29263.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,85.9931,937869 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.307563,29403.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.262501,29407.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.209445,29407.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,56.3445,617641 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.351352,29411.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.309567,29415.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.29502,29419.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,71.4179,779676 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.335896,29435.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.672188,31613.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,85.2213,936301 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.322757,29714 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.684764,31941.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.26709,29729.2 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.285112,29728.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,69.5055,777360 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.307467,29888.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.295346,29865.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.273612,29838.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,55.6456,622355 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.354283,29855.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.307979,29859.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.298238,29851.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,71.1005,788790 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.344088,29886.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.67972,32201.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.257502,29890.2 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.268862,29875 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,69.5758,781009 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.309886,30051.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.291429,30052 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.278302,30047.8 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,55.7293,630570 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.358417,30044.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.302398,30037.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.280664,30048.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,70.8152,789570 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.331947,30025.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.682492,32437.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.257657,30029.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.260459,30037.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,69.301,786044 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.304894,30165.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.292907,30158.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.271474,30165.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,55.4304,626396 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.343966,30192.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.310162,30185 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.279954,30169.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,71.2128,794468 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.337311,30157.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.679983,32637.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.258974,30166.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.26563,30132 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,41.3325,482609 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.276421,30189 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.173746,30143.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.140543,30112.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,37.025,429270 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.303179,30174.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.291083,30158.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.25411,30147.4 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,40.5378,462081 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.279902,30127.4 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.59878,32637.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,52.3163,606454 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.303967,30369.8 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.59624,32910.4 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.149779,30354.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.151398,30370.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,39.9549,457407 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.267909,30432 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.29644,30432 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.167033,30432.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,36.6152,426784 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.305445,30411.8 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.265861,30411.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.253298,30404.4 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,40.3344,461444 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.286495,30407.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.558589,32982.8 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.168377,30396.4 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.153906,30381 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,39.9332,459379 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.275295,30502.4 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.238168,30506.4 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.164415,30471.8 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,36.4597,427336 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.306392,30482.8 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.259096,30483.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.256478,30472.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,40.135,460351 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.274635,30451 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.539401,33064 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.165599,30458.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.145452,30445 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,39.8381,458724 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.276383,30538.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.240191,30550.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.158464,30512.4 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,36.4469,427860 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.310207,30493.8 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.273945,30501.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.253772,30501.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,42.2629,466832 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.285644,30449.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.534654,33089.8 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.156652,30449.8 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.142707,30453.6 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,40.058,458736 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.27996,30544 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.243788,30529 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.179628,30472 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,36.7695,429140 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.315858,30475.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.263141,30483 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.257029,30468.4 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,40.2568,460611 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.279065,30434.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.558212,33082.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.171039,30431 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.147628,30423.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,39.7867,457188 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.275621,30567.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.240747,30536.8 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.164396,30506.2 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,36.5835,426645 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.3065,30533.8 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.269969,30506.8 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.253131,30480.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,40.4429,464271 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.279346,30488.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.559511,33128.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.159743,30442.6 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.140792,30450.2 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,23.5917,274302 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.23493,30522.2 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.165426,30506.8 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.133791,30468.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,28.4656,337130 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.279794,30480.8 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.203634,30465.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.144748,30415.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,23.2664,274717 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.237093,30412.8 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.546628,34418.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,35.5477,414063 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.251928,30522.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.56863,33220.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.125317,30519.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.140825,30519.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,29.5255,340010 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.23292,30541.8 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.180313,30534.2 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.146803,30534.6 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,28.3773,339285 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.278532,30481.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.203046,30462.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.152735,30467 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,23.251,276655 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.236005,30448.6 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.552611,34492.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.124115,30437.2 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.14535,30429.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,29.257,334906 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.238271,30482.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.192607,30482.4 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.138751,30432.8 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,28.0083,338484 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.277989,30411 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.199072,30407.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.147059,30384.6 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,23.2564,274690 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.254187,30358.8 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.532086,33021.6 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.11239,30355 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.134969,30355 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.580304,30339.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.31172,37012 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.165446,30355 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.76924,117418 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_156.txt deleted file mode 100644 index f9f71a75a2e78e34be922ae773bfa9c791c2a3c4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,183.841,1.71106e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.447505,27072.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.23525,27075.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.84951,65216.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.83568,35526.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,89.8907,926980 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.244222,27367 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.330532,27892.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.153331,27351.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,91.3554,928589 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.397432,27500.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.481201,28094.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.293297,27500.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,127.227,1.28517e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.380126,27852.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.11322,31224.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,135.209,1.40165e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.367435,28219.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.11903,32012.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.272037,28246.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.312255,28265.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,138.705,1.46232e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.375307,28600.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.472489,29416.2 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.296324,28600.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,94.4063,989034 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.401322,28695.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.471633,29519.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.305631,28676.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,127.311,1.33592e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.382903,28864.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12661,33387.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.264179,28868.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.306239,28873.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,139.275,1.49541e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.386826,29186.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.466039,30159.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.304684,29186.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,94.6285,1.01322e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.41301,29183.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.485841,30186.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.300637,29187 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,127.613,1.36217e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.381278,29386 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.12636,34566.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.275724,29362.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.336292,29354.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,87.8895,963427 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.312837,29488.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.282719,29438.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.189074,29442.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,60.8412,672193 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.357085,29472.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.310352,29468.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.28499,29472.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,73.548,805069 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.327551,29504.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.672323,31698 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,89.3331,990462 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.336407,29853.4 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.681084,32103.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.254277,29822.2 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.297612,29833.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,72.762,812777 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.310539,30015 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.298385,30000.8 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.278111,30016.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,61.0433,683127 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.355538,30011.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.31283,30004.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.288356,30012 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,73.3172,817841 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.338277,30028.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.683209,32359.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.265445,30020.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.292492,30005.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,72.7418,819128 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.322942,30167.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.303774,30175.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.282187,30182.8 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,60.631,678941 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.364017,28657.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.30645,28645.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.278111,28653.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,73.3072,820758 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.337003,30144.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.678677,32608.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.261753,30163.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.26421,30163.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,72.3336,817532 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.303852,30409.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.263781,30382.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.279262,30371.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,60.9911,690520 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.356503,30398.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.282616,30360 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.284019,30352.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,73.3651,825208 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.336298,30372.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.64561,32890.2 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.256133,30380.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.27276,30365 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,41.2758,486233 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.271634,30402.2 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.173995,30387.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.145017,30357.4 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,36.6619,426979 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.309931,30407.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.272978,30369.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.248748,30358.4 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,40.1366,460069 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.280254,30320.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.562332,32888.2 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,52.0583,600905 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.301892,30558.8 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.566268,33126 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.165676,30524.2 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.153771,30505.2 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,39.6696,457791 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.277062,30555.4 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.244108,30543.8 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.174348,30540 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,36.6356,430399 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.314059,30563.6 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.269349,30528.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.251333,30471.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,40.3201,465937 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.284267,30498.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.542653,33088 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.154604,30471.8 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.142611,30441 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,39.7402,459177 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.275288,30613.2 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.242481,30620.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.16108,30560.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,36.5743,428505 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.306814,30579 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.263998,30579 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.257918,30532.8 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,40.3354,464741 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.296484,30505 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.551325,33157 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.164127,30509.4 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.154897,30513.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,39.9405,461249 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.281093,30612 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.242578,30616.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.171832,30601.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,36.7744,428966 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.310782,29056.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.25765,29041 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.247243,29010.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,40.3897,464965 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.290585,30532.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.539595,33199.2 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.152684,30521.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.144799,30521.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,39.7905,459768 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.274194,30628 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.240805,30589.8 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.164262,30574.4 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,36.4401,427149 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.310365,30578.2 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.265662,30585.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.255384,30559.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,40.2121,461638 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.288658,30536.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.561917,33241.6 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.164185,30532.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.144428,30532.8 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,40.0179,461323 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.286577,30570 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.245457,30562.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.172946,30555.6 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,36.7732,430440 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.312637,30575.2 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.26755,30548.6 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.252383,30533.6 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,40.329,464807 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.292203,30492 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.548919,33212.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.157554,30511 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.14229,30511 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.0052,303830 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.207231,30502.6 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.145625,30499.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.1291,30499.8 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,35.0375,405627 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.267736,30515.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.25946,30500.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.172242,30466.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.0376,305400 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.261036,30417 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.566115,34476.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.0182,459987 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.286974,30573 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.602563,34681.8 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.127686,30573.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.144646,30573.4 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.5317,373056 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.247097,30618 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.202796,30618.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.141715,30596.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,35.312,410153 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.275121,30577.4 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.25475,30581.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.187147,30558.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.039,306435 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.258117,30535.2 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.574249,34644 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.115705,30535.2 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.135147,30531.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.2592,371059 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.236882,30599.6 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.190277,30580 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.142733,30554.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,35.026,406656 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.267121,30550 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.243339,30546.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.173516,30511.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.0226,306052 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.265285,30443.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.565577,34549.2 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.112774,30440.4 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.129964,30440.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.559332,30425.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.19279,35903.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.183986,30410 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.72707,117603 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_157.txt deleted file mode 100644 index d83b30ed36748e31285c98b911c2d0f051f16b0b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,184.037,1.74891e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.443012,27344.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.243077,27351.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.1934,60736.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.65401,35365.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,90.7489,929264 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.23653,27619 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.289701,27638 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.170201,27622.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,91.7284,947562 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.406648,27802.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.484952,28411.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.308108,27794.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,127.872,1.29485e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.384727,28082.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.13501,31447 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,135.453,1.41071e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.374609,28536.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.12737,32314 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.283288,28521 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.308958,28528.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,138.84,1.47588e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.366968,28854.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.469406,29658.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.301509,28847 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,95.2008,1.01004e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.416651,28904.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.471543,29724.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.300472,28881.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,127.64,1.35579e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.37987,29020.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.11009,33543.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.273061,29023.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.325067,29027.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,139.131,1.50154e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.370149,29397.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.468132,30350.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.30277,29362.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,94.9011,1.02157e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.414647,29393.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.472419,30342.8 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.306379,29335.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,128.31,1.38079e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.395966,29507.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.11566,34712.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.27116,29523.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.324018,29515 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,87.7195,973511 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.318282,29590 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.266244,29574.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.193772,29559.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,61.1538,676838 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.367569,29678 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.318193,29651.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.291051,29662.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,73.4264,808655 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.341751,29682.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.686966,31872.8 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,89.0127,990530 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.341112,29980.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.719394,32223.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.927963,36037 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.291153,29973 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,71.9851,811042 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.306795,30089.8 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.300305,30097.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.273918,30101.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,60.922,686665 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.371102,30097.4 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.319826,30105 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.282437,30105 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,73.4337,820665 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.349451,30063 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.673603,32424.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.273355,30078.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.350481,30090 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,72.3126,817258 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.312568,30299.4 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.26147,30299.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.279896,30288 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,60.7685,685817 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.358328,30295.4 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.309534,30303.4 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.28666,30299.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,73.5745,826331 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.339096,30348.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.679562,32809.4 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.273196,30329.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.293163,30317.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,72.5525,824476 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.312811,30427.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.270135,30435.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.274104,30454.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,61.2862,697751 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.358526,30439.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.3166,30417 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.285343,30409.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,73.6448,831108 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.348503,30416 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.778728,32949 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.295646,30431.8 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.27875,30424.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,44.9516,517351 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.273887,30497 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.206098,30485.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.15189,30466.4 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,50.8041,585431 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.315,30500.8 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.267314,30504.6 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.261637,30504.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,47.2666,549398 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.305771,30480.8 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.611209,33044.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,63.2374,736038 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.31301,30702.6 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.585501,33293 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.170975,30706.6 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.155448,30710.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.0784,554991 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.285502,30790.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.230347,30779.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.156792,30783.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,51.0753,592644 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.317253,30911.8 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.270072,30881.4 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.259582,30851 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,47.033,554408 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.317163,30842.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.551888,33463.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.158744,30819.6 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.15255,30831.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,47.1293,558905 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.290303,30960.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.265464,30968.4 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.147993,30915 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,51.0673,592923 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.319966,30987 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.266552,30979.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.262852,30949 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,47.0602,557219 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.289369,30937.6 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.605668,33585.2 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.144992,30914.8 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.14478,30922.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,47.3598,560137 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.287717,30987 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.227545,30971.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.148748,30968 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,51.1172,594484 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.325733,31055 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.275045,31005.6 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.256914,31009.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,47.2048,558233 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.287537,30945.4 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.548771,33642.8 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.146091,30937.8 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.142482,30937.8 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,47.2641,559373 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.286123,31100.8 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.229521,31100.8 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.153119,31024.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,51.5505,596775 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.334679,31116 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.317009,31127.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.260927,31097.4 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,47.327,559971 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.29964,31070.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.615836,33802.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.149376,31070.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.142489,31063.2 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,47.1668,562303 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.289791,31146.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.272671,31131.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.153344,31131.2 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,51.1954,597754 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.333776,31120 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.316363,31131.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.265713,31135.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,47.176,558821 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.295934,31078.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.559991,33840.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.146329,31078.2 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.140812,31082 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.0182,310885 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.213477,31093.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.147782,31089.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.130559,31089.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,35.141,413296 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.272318,31070.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.264689,31059 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.183947,31036.2 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.0937,312512 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.26469,30994.4 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.584111,35088 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.3002,471372 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.285515,31112.4 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.609066,35256 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.130501,31108.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.137804,31104.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.3058,377751 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.229446,31162.4 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.203078,31127.8 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.139257,31101 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,35.0721,413987 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.264562,31108.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.260287,31108.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.179148,31070.6 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.5144,317268 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.268465,30994.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.579894,35088 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.119564,30971.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.137945,30967.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.1678,378250 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.22988,31044.2 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.192691,31040.4 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.136056,31021.4 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.988,412461 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.270271,30998.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.253483,30986.8 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.172108,30926.4 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.1216,311619 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.273752,30892.6 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.568605,34963.2 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.119564,30862 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.128972,30862 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.590134,30862 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.28456,37672 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.128435,30862 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.84476,119309 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_158.txt deleted file mode 100644 index e2b008c23901b088b1280c581a89c8234995fc71..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,177.887,1.63634e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.433195,26363.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.295205,26363.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.33953,58518.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.66843,34145.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,88.8278,893770 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.228069,26669.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.318456,27214.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.148338,26670.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,88.9684,884484 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.416318,26825.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.483959,27427.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.31468,26819 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,121.7,1.19676e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.382007,27099.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.12918,30394.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,128.357,1.28909e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.372445,27550.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.11873,31183.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.278878,27554.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.318617,27562 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,136.613,1.41016e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.359582,27908.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.470512,28686 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.295352,27939.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,91.951,938582 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.411972,27973.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.480304,28801.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.312747,27989 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,121.65,1.23837e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.405841,28158.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.1231,32583.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.2766,28175.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.335831,28183.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,137.041,1.43943e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.375626,28462.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.463965,29389.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.303831,28447.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,92.4919,953552 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.407895,28512.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.502417,29454.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.318731,28470.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,121.354,1.26669e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.396478,28616 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.11292,33598.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.274649,28643 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.316722,28650.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,85.7043,917592 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.309611,28787.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.306098,28779.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.204434,28772 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,55.356,598359 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.351076,28783.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.323435,28791.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.284332,28799 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,67.7671,728263 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.327902,28776.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.681545,30912.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,81.8561,878304 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.333324,29065.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.67736,31217 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.264946,29069.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.290443,29084.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,68.9511,759009 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.303659,29240.8 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.302053,29233.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.274181,29222 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,55.2878,606947 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.362929,29221.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.314289,29206.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.289605,29195.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,67.6555,734937 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.329559,29202.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.685903,31468.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.260734,29210.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.262245,29191.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,69.3951,766491 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.312222,29420 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.270744,29378.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.287359,29336.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,56.0401,617063 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.364241,29366.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.279359,29370.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.286239,29344 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,67.7171,742335 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.329911,29325.2 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.660981,31690.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.261893,29325.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.243436,29325.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,68.7807,765743 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.307409,29488.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.260831,29458.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.276344,29466 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,55.5994,617697 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.35715,29469.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.274527,29439.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.286756,29450.6 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,67.9684,744086 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.327621,29466 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.643491,31895.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.267435,29477.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.25788,29469.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,41.0915,471844 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.269349,29523.2 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.163085,29481 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.140473,29462 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,36.564,413404 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.312094,29542.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.268018,29527 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.262546,29496.4 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,39.5747,442848 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.285265,29496.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.580727,31934 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,51.1295,565950 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.293726,29653.6 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.580413,32152.2 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.187782,29649.6 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.153835,29641.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,39.8871,446857 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.283499,29787.4 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.248978,29775.8 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.165996,29756.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,36.524,417383 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.30867,29768.4 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.266059,29779.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.257279,29749.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,39.5029,445415 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.280012,29741.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.56408,32266.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.159545,29729.6 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.151756,29733.6 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,40.4872,456874 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.29162,29822.2 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.24579,29826 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.167641,29826 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,36.4926,416681 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.317535,29814.2 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.272741,29806.6 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.262469,29772.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,40.1935,453141 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.324958,29735.4 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.566973,32295.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.163078,29739.4 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.155326,29739.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,39.9648,449639 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.290092,29827.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.24638,29820.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.168294,29790 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,36.5355,418233 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.317527,29796.6 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.278052,29781.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.265791,29781.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,39.537,447472 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.294897,29765.6 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.565488,32352 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.155013,29750.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.154271,29762 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,39.8897,449917 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.295204,29853.8 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.23029,29808 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.158713,29792.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,36.6149,418511 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.321541,29807.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.26588,29780.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.263481,29769.4 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,39.5218,447389 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.288312,29750.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.563862,32314 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.148448,29735.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.140882,29739 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,39.8227,448320 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.276875,29891.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.246776,29880.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.162086,29884 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,36.352,418849 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.316415,29872.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.269099,29876.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.256485,29849.8 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,39.4144,448397 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.290315,29823 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.565086,32439.8 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.149798,29834.4 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.140882,29830.6 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,23.5226,269978 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.236965,29895.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.156082,29880.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.137004,29876.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,29.2506,334447 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.266174,29838.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.223263,29838.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.143045,29800 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,23.1934,269074 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.248857,29742.8 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.5436,33656.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,35.4628,397625 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.258136,29868.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.585308,33767.4 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.12638,29853.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.137017,29853.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,29.1434,328638 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.237393,29960.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.186795,29960.6 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.133214,29907 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,29.303,335380 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.27973,29872.8 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.220441,29872.8 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.144159,29865.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,23.3877,271183 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.247499,29861.6 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.546614,33821.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.112639,29846.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.140646,29838.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,29.4813,329859 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.252971,29876.8 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.201963,29861.6 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.154258,29861.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,29.6998,336044 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.279595,29819.6 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.229739,29815.8 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.152671,29770 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,23.3263,268863 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.249842,29728 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.545891,33665.2 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.109421,29709 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.129702,29705.2 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.589693,29709 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.17853,34981.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.183544,29705.2 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.8411,114887 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_159.txt deleted file mode 100644 index 71d4f29c81dff8609f20a5f860737044d1f35efa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,178.95,1.64736e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.445469,26509.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.249343,26486.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.23551,56963 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.80194,34749.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,89.6136,903481 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.237195,26819.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.336881,27348.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.150284,26838.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,90.2661,891722 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.39884,26975.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.466648,27558.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.31276,26956.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,121.864,1.20135e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.378122,27213.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.11757,30508.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,129.262,1.31027e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.37068,27661.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.11818,31301.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.282495,27650.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.299224,27654.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,138.12,1.42603e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.38099,28004.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.472016,28816.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.30602,28023.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,93.9075,952876 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.408291,28084.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.48696,28916 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.314092,28065.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,122.491,1.26097e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.379704,28251.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12823,32652.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.271026,28225.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.31299,28218.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,138.171,1.45993e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.367133,28565.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.462378,29496.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.295204,28558 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,93.9862,982531 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.406078,28611.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.475434,29588.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.313521,28615.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,122.728,1.2801e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.379076,28704.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.12064,33732.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.271684,28700.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.298533,28696.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,87.5864,943336 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.324734,28787 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.293387,28794.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.18544,28802.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,60.394,652253 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.363378,28848 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.31731,28851.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.286635,28855.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,70.8305,760619 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.336478,28844.8 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.676765,30977 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,86.0779,932212 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.336766,29172.4 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.699401,31354.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.271487,29168.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.307615,29168.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,71.9579,791814 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.315326,29332.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.30165,29343.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.276536,29336 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,60.8314,667645 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.360236,29328.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.309119,29320.8 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.282699,29336.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,70.6395,769970 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.336945,29343.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.673769,31617.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.26924,29340.2 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.279647,29344 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,72.1388,797935 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.317208,29507.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.303833,29477.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.283333,29492.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,60.3464,665570 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.36117,29538.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.319621,29492.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.290712,29496.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,70.671,771160 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.34435,29481 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.665565,31865.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.256589,29484.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.277196,29492.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,72.1152,803586 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.314015,29618.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.30092,29614.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.278603,29626.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,60.7743,673067 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.358865,29641.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.304389,29641.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.300766,29630 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,70.5842,781178 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.340011,29606.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.706326,32074.8 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.270098,29618.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.261983,29622 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,41.195,474310 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.268132,29667.6 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.188249,29633.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.139129,29625.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,36.5166,416155 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.30549,29668 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.292786,29675.6 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.253151,29675.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,39.5776,447119 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.29996,29634 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.597469,32113.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,51.2933,570957 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.289547,29870.8 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.645821,32403.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.191001,29878.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.162963,29867 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,40.1829,452606 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.278526,29969.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.254956,29973.4 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.170412,29970.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,36.3363,420228 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.311231,29985.6 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.26821,29993.2 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.258386,29981.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,39.6441,450149 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.291339,29958.8 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.573239,32507.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.154604,29947.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.144831,29920.4 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,40.5696,456602 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.295634,29973.2 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.245874,29974 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.16439,29958.6 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,36.2945,420522 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.319058,29962.6 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.263538,29921 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.253163,29909.8 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,39.7472,452460 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.341124,29870 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.567389,32456.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.153478,29862.8 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.15957,29866.8 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,39.587,448730 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.281087,29996.4 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.246149,30004.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.169196,29992.6 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,36.5315,418876 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.321093,29991.6 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.26629,29961.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.257361,29946 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,39.5941,448639 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.281567,29933.6 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.574582,32542.8 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.154501,29937.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.146572,29941.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,39.6379,452452 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.278629,29998.8 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.239449,29998.8 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.157145,29998.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,36.4814,422310 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.31004,29998 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.267583,29986.6 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.254885,29990.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,39.5472,448826 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.29164,29963.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.568028,32588.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.150584,29952.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.142399,29952.2 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,39.8558,451137 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.272004,30039.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.244734,30020.8 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.165535,29997.8 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,36.6778,424021 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.310711,30023.8 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.26261,30024.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.25989,29997.8 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,39.7759,452455 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.287717,29994.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.565059,32642.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.152563,30006 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.142476,29990.8 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.1332,300041 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.207813,30013 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.144984,30009.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.128691,30006 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.9348,399055 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.272933,28503 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.255666,30021.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.180089,29983.2 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.5154,303522 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.263698,29952.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.568029,32596.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.35,455296 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.277919,30044.6 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.58321,32684.2 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.13006,30013.8 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.143826,30013.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.5606,371085 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.239788,30120.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.183615,30101.6 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.135814,30074.8 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,34.6022,394215 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.275653,30082 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.264139,30047.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.175404,30032.6 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.3439,301169 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.253778,30032.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.556323,32664.4 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.117516,29986.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.140172,29987.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.433,369142 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.239486,30086.2 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.201214,30082.4 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.144537,30048 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,35.0333,402416 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.276101,29978.8 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.255513,29978.8 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.17397,29978.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.3247,300612 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.25468,29960 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.564028,32631 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.118381,29960.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.132915,29960.6 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.576157,29945.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.37604,37941.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.131596,29945.4 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.81582,115814 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_160.txt deleted file mode 100644 index bd610058084cfde81ae8bd5522f782c71fc495aa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,180.811,1.66495e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.434461,26820.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.242584,26832.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.39652,61834.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.81126,35642.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,88.8753,907489 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.234514,27125.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.33219,27658.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.153164,27121.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,91.5815,923378 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.407933,27254.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.470582,27852.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.306903,27251.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,122.833,1.22597e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.403633,27420.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.12989,30722.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,129.745,1.32135e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.379076,27837 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.12334,31477 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.268107,27848.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.300171,27863.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,137.759,1.42956e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.372881,28160.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.465329,28918.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.302916,28133.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,94.013,968078 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.41544,28183.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.469956,28991.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.309413,28191.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,122.4,1.25462e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.381361,28380.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.11527,32713 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.278411,28389.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.304254,28401 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,138.469,1.45279e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.377572,28665.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.473507,29592.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.287282,28638.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,93.8208,981679 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.409526,28669.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.471421,29626.6 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.300747,28673.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,122.117,1.27401e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.381886,28814.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.12017,33743.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.279212,28773 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.309137,28765.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,86.9685,938565 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.31651,28860.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.295096,28852.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.189177,28860.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,60.3326,650886 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.355115,28917.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.314309,28902.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.286417,28909.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,70.475,756598 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.342878,28914 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.671484,31050 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,85.3623,920484 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.340875,29218.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.640086,31411.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.260504,29244.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.291947,29248.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,71.6881,790987 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.304843,29450.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.257925,29412.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.276664,29397 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,59.958,662105 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.353873,29427.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.271909,29412.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.286943,29427.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,69.4054,763271 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.332958,29420.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.631779,31713.2 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.269374,29405 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.258724,29405 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,71.4036,797477 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.303051,29583.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.262584,29568.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.273131,29549.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,59.7342,658091 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.359044,29584 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.269835,29553.4 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.282751,29564.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,70.346,771867 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.341105,29511.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.651395,30362.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.260607,28004.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.267992,28012.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,72.0731,800823 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.309176,29729.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.271172,29718.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.279179,29725.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,60.274,668407 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.351018,29737.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.269176,29729.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.293016,29733.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,69.6733,767808 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.336714,29710.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.644956,32182.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.267454,29729.8 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.256735,29729.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,44.7788,505807 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.275563,29771.4 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.204639,29767.6 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.142022,29775.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,51.0445,562260 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.316734,29863.4 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.285528,29848.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.258707,29806 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,45.6316,510400 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.286731,29799.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.580278,32294 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,61.9518,698222 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.300747,30034.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.599926,32563.4 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.191467,30023 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.155768,30019.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.5582,539705 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.292823,30171.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.227557,30160.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.161867,30156.8 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,50.709,575155 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.319249,30184.6 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.267609,30169.6 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.259512,30154.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,45.7716,517697 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.279096,30128.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.574224,32699.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.174585,30113 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.145433,30113 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,46.8376,544443 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.283781,30240 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.214412,30225.2 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.147859,30203 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,50.6623,577137 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.31436,30303.8 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.266424,30311.6 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.251577,30296.8 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,45.9228,519230 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.278909,30255 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.56934,32852.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.179666,30228.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.146393,30216.8 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,47.0784,544933 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.285554,30374.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.222854,30359.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.152709,30330.6 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,50.6732,578679 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.315857,30373.6 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.273548,30369.8 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.256569,30344.2 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,45.7746,521148 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.27715,30323.2 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.569488,32993.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.173811,30330.8 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.150284,30330.8 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,47.6709,552752 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.296427,30398.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.230423,30398.6 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.159129,30383.2 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,50.7314,581584 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.325162,30439.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.267608,30439.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.258353,30428.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,46.0305,525439 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.291218,30391.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.575217,33093 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.173823,30376.4 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.149996,30387.8 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,47.0194,544943 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.288383,30501.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.223448,30490 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.155475,30474.6 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,50.6882,584794 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.321521,30546.2 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.270443,30508.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.257157,30497.8 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,45.7568,523504 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.281439,30480.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.546327,33211.4 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.175622,30468.4 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.140556,30468.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,24.9118,304940 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.210175,30509.6 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.144536,30487.4 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.128287,30480.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.5936,400818 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.266424,30459 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.266981,30459 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.17564,30413.8 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.3795,309588 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.265292,30406.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.570391,34488.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.2506,461108 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.28067,30533.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.614173,33269.2 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.138079,29165 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.140287,29153.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.8367,376914 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.239909,30568 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.201535,30537.6 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.152203,30530.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,35.2247,406414 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.287941,30515.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.266418,30500.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.17854,30462.6 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.3405,305257 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.26234,30414 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.580495,34511.4 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.119391,30398.8 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.135698,30383.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.2078,367891 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.229311,30459.2 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.196408,30444 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.138092,30409.8 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.6151,400496 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.268049,30432.6 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.26853,30402.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.175622,30387.4 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.2198,306581 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.260862,30383.6 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.572611,34481 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.116781,30376 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.127788,30376.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.556406,30376.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.19609,37159.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.126393,30361.2 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.81717,118923 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_161.txt deleted file mode 100644 index 393a1d43f9c935e9e0c166595c3a969961908f2f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,195.108,1.81519e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.433956,27253 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.248447,27256.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.56069,60508.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.68245,35757.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,92.8657,961447 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.246341,27557.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.353304,28102.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.16517,27550.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,101.276,1.03164e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.41187,27668.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.490878,28281.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.303582,27683.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,135.196,1.36598e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.407741,27882.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.13266,31293.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,139.646,1.44712e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.38332,28414.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.14604,32184.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.287864,28391.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.313713,28384.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,146.114,1.54317e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.372536,28743.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.474852,29595 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.29505,28767.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,104.885,1.10969e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.417706,28755 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.488695,29644.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.303038,28771.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,134.066,1.40621e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.378788,28913.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12182,33515.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.295146,28905.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.328184,28894.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,146.041,1.572e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.376836,29232.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.47919,30201.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.296119,29240.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,104.732,1.11895e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.418819,29202.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.487286,30217.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.309464,29221.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,133.635,1.42558e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.387653,29343 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.10995,34547 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.278129,29366.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.314615,29335.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,89.8875,985723 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.329233,29408.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.274905,29415.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.205906,29419.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,64.8379,712536 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.364599,29438.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.320529,29446.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.286571,29435 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,76.2495,821583 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.343204,29527.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.692476,31716.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,92.6438,1.02244e+06 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.336126,29839.4 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.702646,32089.6 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.275473,29789.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.292702,29774.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,76.3134,851660 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.320133,29960.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.315825,29975.8 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.278495,29971.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,65.4582,729145 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.365451,29975 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.320875,29944.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.282949,29959.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,76.2076,848956 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.343403,29942.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.694325,32345.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.271698,29968.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.283691,29952.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.777,848897 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.307627,30152.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.306398,30145.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.279672,30088 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,65.7397,738812 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.369592,30121.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.32835,30129.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.287646,30121.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,76.5729,857258 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.342731,30072.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.707074,32544.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.264498,30091.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.287961,30095.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.7489,852577 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.320126,30201.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.326148,30189.8 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.279717,30205 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,65.3377,734122 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.356043,30238.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.31676,30200.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.287429,30200.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,76.4302,856344 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.340702,30193.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.701481,32737.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.264075,30182 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.298616,30205.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,45.2417,514760 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.289854,30253.8 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.204082,30250.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.146118,30228 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,51.639,589492 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.312382,30387.6 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.306705,30372.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.257554,30341.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,47.3801,543185 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.296766,30340.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.567389,32927.2 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,63.5269,733494 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.311218,30584.2 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.582103,33185.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.18062,30522.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.155372,30530.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.296,553746 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.286641,30668.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.237893,30661.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.152697,30641.8 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,51.5303,590393 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.318053,30694.8 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.320356,30671.6 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.259601,30664 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,47.4136,552558 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.2918,30659.8 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.558128,33295.8 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.158073,30648.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.155602,30632.8 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,47.0885,553691 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.285164,30758.8 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.229561,30728.4 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.159206,30717 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,51.1918,575795 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.317476,30789.2 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.27125,30796.8 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.25701,30770.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,47.3481,551140 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.295378,29172.4 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.556476,31850.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.169285,29172.4 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.154431,29172.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,47.8917,552417 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.290713,30861.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.235839,30861.8 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.164613,30846.6 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,51.3383,595616 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.318104,30839 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.270501,30842.8 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.267051,30835.2 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,47.4037,549551 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.291416,30804.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.580829,33498.4 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.161695,30801 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.146879,30808.6 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,47.2533,558207 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.290175,30872.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.230987,30872.6 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.154092,30850.4 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,51.1356,596563 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.312402,30922 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.30723,30895.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.25859,30872.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,47.2816,556614 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.291915,30842.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.555196,33551.6 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.154124,30823.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.143673,30808.6 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,47.5936,558954 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.29347,30979.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.231653,30983.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.158578,30949 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,51.1534,596072 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.31096,30967.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.276938,30967.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.260133,30956.6 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,47.4954,557616 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.285061,30945.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.559511,33691.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.162923,30918.6 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.148645,30922.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.147,307792 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.212024,30937 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.152408,30937 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.131871,30906.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.4724,405121 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.269125,30877.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.266245,30873.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.177253,30858.2 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.3295,311095 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.25173,30843.4 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.57754,34968 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.3556,469204 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.283448,30971.6 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.608348,35095 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.132089,30952.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.147173,30937.4 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.1849,374702 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.235044,30990.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.20373,30975.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.145592,30971.6 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,34.5602,407624 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.266455,30956.8 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.251327,30941.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.172813,30911.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.3661,311313 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.266814,30889 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.587753,35013.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.124921,30870 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.142643,30862.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.379,373741 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.253144,30908 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.205752,30904.2 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.152984,30881.4 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.4038,409689 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.275249,30874.2 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.253471,30859 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.169976,30855.2 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.177,309213 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.255499,30813.2 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.582589,34945.4 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.115347,30798 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.131411,30782.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.581072,30782.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.22635,36307.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.190278,30786.6 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.80082,118999 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_162.txt deleted file mode 100644 index fb4626bdf9e07cf25b1ca67575ea1754dc11292e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,192.851,1.77804e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.479011,26612.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.291083,26605.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.81974,59212.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.68096,35310 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,90.9513,911523 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.291019,25482.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.386129,26038.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.204562,25494 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,102.102,1.0085e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.414692,27095.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.480548,27689.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.308171,27068.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,134.455,1.33407e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.399825,27398.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.34557,30832.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,138.664,1.41189e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.378116,27868 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.13367,31649.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.265528,27856.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.322885,27863.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,146.3,1.52113e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.383197,28269.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.464099,29123.8 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.291512,28304.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,105.171,1.09026e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.626102,29757.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.466179,29189 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.306781,28330.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,134.101,1.38829e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.386884,28544.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.08735,33112.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.271897,28540.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.335992,28544.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,145.958,1.54848e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.378865,28862.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.451722,29831.8 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.29027,28867 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,104.901,1.10299e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.414103,28859.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.473232,29874.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.312261,28875.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,133.38,1.3813e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.383249,29075.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.10488,34363 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.275263,29071.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.30286,29048.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,90.463,982056 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.326122,29203 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.260011,29191.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.199768,29164.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,65.2848,708388 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.363716,29233.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.317847,29207 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.284055,29210.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,76.133,824530 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.342418,29260 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.636086,31491.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,92.9708,1.01326e+06 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.337374,29599.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.636726,31900 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.271608,29615 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.3094,29607.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,75.9321,839184 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.304394,29759.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.260695,29763 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.272869,29778.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,65.6711,727721 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.365412,29772 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.314136,29779.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.283013,29764.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,76.3715,843024 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.347626,29736.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.685833,32136.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.269579,29756 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.280543,29748.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.8887,843630 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.310846,29977.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.266642,29980.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.275602,29991.8 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,65.5386,730208 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.361694,29980.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.315889,29962 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.280895,29969.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,76.5441,848252 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.343064,29923 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.64904,32371.4 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.259704,29919 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.278917,29938.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.6751,848366 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.31491,30110.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.291333,30118.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.273503,30118.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,65.5927,738506 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.374788,30095.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.315845,30107.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.279487,30118.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,76.3577,849457 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.368241,30080 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.654159,32636 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.268242,30091.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.289842,30103.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,45.0053,513316 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.279192,30152.8 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.192421,30152.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.141931,30133.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,51.3991,584398 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.316184,30243.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.296075,30228 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.252133,30212.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,47.2322,544416 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.292427,30193 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.567843,32787.2 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,63.3416,729243 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.306552,30450.8 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.568925,33071.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.164837,30444 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.153945,30451.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.4376,551935 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.28547,30554.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.225464,30543.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.153439,30543 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,51.2885,587265 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.344139,30622.4 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.316299,30607 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.261413,30592.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,47.3575,548002 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.295147,30580 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.5724,33250.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.157925,30572.4 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.147264,30568.6 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,47.308,555436 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.284984,30652.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.281573,30660 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.162776,30656.4 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,51.3185,595613 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.317982,30746.8 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.306072,30708.6 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.264127,30701 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,47.7061,555775 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.294418,30672.4 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.573706,33365.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.162495,30668.4 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.144012,30672.2 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,47.1191,551374 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.282993,30777.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.225099,30785.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.154136,30751 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,51.2538,593829 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.315755,30789 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.298027,30792.8 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.258776,30797 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,47.584,556042 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.287986,30752 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.550947,33495 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.151531,30755.8 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.146118,30744.4 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,47.0617,554070 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.280338,30777.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.223717,30785.2 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.153798,30785.2 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,51.4819,595343 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.322699,30831 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.307492,30808.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.263103,30796.8 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,47.4117,555088 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.289918,30774 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.558429,33517.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.145516,30751.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.140901,30736 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,47.0976,556246 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.283743,30865.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.225432,30865.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.150361,30865.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,51.5105,596858 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.33866,30967.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.301816,30914.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.263673,30895.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,47.3273,556680 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.299359,30872.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.616617,33633.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.144857,30854 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.149798,30838.8 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.0516,308726 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.211608,30880 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.155365,30880 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.138572,30827.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,35.1594,408993 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.260689,30834.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.266424,30823 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.187264,30804 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.1473,309016 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.253355,30774 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.570858,34923.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.0525,466727 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.290341,30887.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.605174,35083.4 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.133535,30884 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.148998,30884 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.4357,375446 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.23738,30929.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.195602,30925.8 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.138841,30891.6 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,35.2437,409419 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.255954,30857.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.253631,30857.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.186636,30842 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.203,309345 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.25861,30811.6 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.580131,34964.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.115737,30811.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.136569,30811.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.1955,374537 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.236248,30861.6 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.193721,30826.8 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.1371,30827.4 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,35.0225,405808 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.258142,30828.6 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.255333,30813.4 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.187666,30798.6 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.1779,308798 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.255858,30756.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.576118,34903.6 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.118701,30748.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.130361,30725.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.578742,30729.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.23008,36273.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.201163,30733.4 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.72888,118760 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_163.txt deleted file mode 100644 index aeaff59555bf4dd4488057ebe107d423e452a728..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,194.17,1.81677e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.471095,26662.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.268471,26665.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.69155,58763 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.91877,35768.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,90.7652,914512 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.298795,27000.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.394622,27537.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.205254,26970 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,101.487,1.01196e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.409668,27141.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.502915,27731.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.317176,27122.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,134.518,1.336e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.387409,27421 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.13185,30839.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,138.94,1.40372e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.37715,27898.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.10309,31718 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.282961,27917.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.317867,27928.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,145.963,1.51736e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.383461,28276.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.49185,29119 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.318853,28260.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,105.218,1.08832e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.427901,28303 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.485443,29176.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.321003,28291.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,134.832,1.35957e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.39722,28491.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12655,33097.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.289867,28491.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.335992,28518 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,146.445,1.55361e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.379326,28920.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.470186,29855.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.30901,28879.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,104.74,1.10753e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.413009,28878.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.493054,29893.8 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.319953,28887.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,134.504,1.4184e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.388535,29002 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.12097,34281.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.280741,29013.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.312927,29013.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,89.6769,970715 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.318693,29168.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.266552,29176.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.222482,29187.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,65.4874,708922 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.37139,29184.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.324145,29191.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.288875,29203.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,76.1659,822389 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.340567,29199.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.694005,31438.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,92.829,1.01024e+06 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.343326,29558.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.686396,31858.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.272716,29573 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.297176,29546 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,75.9821,838176 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.309234,29787.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.302642,29780 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.281067,29780 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,65.6662,724643 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.362333,29791.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.323665,29799.2 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.298039,29791.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,76.1126,839439 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.3462,29799.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.683401,32141.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.275929,29772.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.28997,29776.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.846,849095 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.31347,29984 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.306104,29957.8 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.283838,29965.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,66.0305,737580 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.364945,30023.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.314545,29985.4 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.290897,29974 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,76.1082,846427 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.351223,29992.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.706172,32487.4 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.276217,30000.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.300684,30008.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.8901,854174 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.317476,30127 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.306673,30135 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.289739,30131.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,65.2449,730228 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.364068,30108.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.305515,30097.6 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.288747,30101.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,76.3106,848727 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.346116,30078.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.709289,32642.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.274649,30079.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.300811,30083.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,45.8197,519414 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.282437,30124.8 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.202962,30114.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.143506,30110.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,53.1664,603932 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.321189,30247.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.302674,30216.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.263454,30202 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,47.9019,544268 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.291192,30201 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.57128,32795.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,64.467,735029 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.301604,28901.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.587612,31502.8 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.203679,28870.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.164018,28874.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,49.2696,557497 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.29077,30588.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.298527,30573.4 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.191788,30558 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.0797,614045 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.328044,30683.2 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.319473,30683.2 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.262686,30675.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,48.1063,553127 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.305259,30651.8 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.603971,33337.6 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.203954,30663.4 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.152531,30663.4 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,48.6753,564275 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.280581,30724.2 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.246891,30709 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.183666,30693.8 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,53.0236,616878 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.319953,30778 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.333387,30751.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.263384,30705.6 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,48.3071,561323 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.288996,30709.6 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.572592,33418.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.187225,30686.8 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.154489,30686.8 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,48.587,565833 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.277841,30830.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.245004,30834.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.181784,30777.4 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,53.0612,618927 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.318367,30910.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.305618,30914 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.262636,30906.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,48.1948,560432 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.284606,30850 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.576484,33584.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.185843,30857.6 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.150163,30846.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,48.7421,569459 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.282002,30933 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.291896,30917.8 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.191571,30917.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.1103,620979 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.323359,30925.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.291473,30925.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.270705,30929.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,48.2103,560372 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.290801,30887.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.587753,33652.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.18206,30891.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.143545,30891.2 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,48.5894,566127 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.284504,31066.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.251953,31043.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.193886,31013.2 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,53.0382,619712 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.331025,31081.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.335909,31085.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.267301,31009.4 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,48.5999,566878 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.291281,30994.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.577681,33786.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.175231,30967.6 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.144006,30956.2 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.0729,306800 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.210501,30990.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.152184,30990.4 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.131948,30986.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.2558,406249 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.267992,30971.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.250961,30975.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.171935,30956 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.2583,308937 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.257356,30929.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.576317,35113.8 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.2161,467481 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.278571,31036 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.60317,35208.8 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.133311,31001.8 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.137945,30994.2 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.3775,377089 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.249599,31112.2 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.199616,31062.8 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.148269,31017.2 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,34.3474,409987 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.27964,30998.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.253714,30998.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.17093,30986.8 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.3062,310852 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.261752,30949.2 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.583273,35129.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.117695,30941.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.140761,30911 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.2168,375931 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.232209,30953 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.196683,30953 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.139289,30953 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.3335,407522 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.278418,30945.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.262277,30930.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.166648,30915.4 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.3952,311507 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.267774,30866.2 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.583516,35061.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.116473,30858.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.134681,30858.6 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.593769,30851 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.22655,37850 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.13328,30854.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.72798,119250 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_164.txt deleted file mode 100644 index 2d2d83329218d03a220a795739867f212389b0ce..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,196.677,1.87024e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.476209,26619.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.277675,26608.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,6.19781,72892.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.76525,35157.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,89.5278,900211 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.294744,26920.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.390743,27472.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.205452,26916.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,106.154,1.05704e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.422347,27110.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.482698,27723.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.31603,27118.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,136.814,1.35626e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.404299,27421.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.13557,30817 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,140.182,1.42243e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.379813,27902.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.12058,31683.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.285457,27879.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.320971,27887 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,148.429,1.54201e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.382916,28299.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.474064,29142.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.296631,28315 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,109.015,1.12637e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.425053,28342 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.500887,29230.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.30426,28341.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,136.448,1.40933e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.399543,28513.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12568,33105 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.28876,28532.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.316375,28506.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,148.642,1.57655e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.390923,28846.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.469572,29843.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.289662,28863.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,108.169,1.1371e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.418371,28862 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.488554,29885.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.302508,28886 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,136.301,1.43639e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.399812,29028.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.12726,34232.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.288171,29040.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.316862,29044 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,90.0136,973729 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.33123,29142.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.31162,29130.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.213529,29138.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,65.3819,706628 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.372875,29187.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.320721,29195.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.293732,29206.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,76.56,827587 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.343786,29233 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.695081,31468.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,93.1894,1.02024e+06 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.338359,29538.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.703132,31835.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.264083,29538.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.284715,29561.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,76.0143,842060 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.318629,29715.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.268901,29704.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.278788,29708 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,65.302,719244 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.374111,29751.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.315211,29739.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.290341,29712.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,76.3842,841344 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.349937,29732 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.700502,32109.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.26583,29755.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.280402,29747.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.7709,845864 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.313995,29936.4 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.290219,29902.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.272855,29913.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,65.287,728406 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.380542,29932.4 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.317131,29905.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.28444,29889.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,76.2274,844914 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.340734,29916.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.68397,32365.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.261394,29913.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.284312,29913.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.998,849458 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.326501,30103.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.303333,30107.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.269528,30111 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,65.0639,729206 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.369458,30083.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.31539,30095.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.307159,30084 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,76.3686,849666 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.341623,30044.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.701186,32547 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.256984,30025.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.296043,30029.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,46.024,520112 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.276991,30126.2 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.219787,30080.6 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.145325,30065.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,52.8361,604711 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.321949,30205 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.307429,30209 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.252895,30170.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,48.072,546347 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.290636,30174 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.564842,32753 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,64.5646,735721 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.310693,30403.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.635202,33032 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.231326,30415.2 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.175717,30392.2 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,48.6423,558770 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.285221,30568.4 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.247161,30553 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.178642,30491.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.025,611575 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.324209,30625.6 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.31955,30591 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.260997,30579.6 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,48.2043,552201 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.299077,30579.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.58257,33234.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.18981,30544.8 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.144927,30560 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,48.6939,560116 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.283659,30678.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.247857,30678.6 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.182604,30663.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,53.0421,614726 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.33557,30740 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.30391,30732.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.272332,30736.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,48.1014,553523 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.294008,30698.2 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.577411,33395.6 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.183513,30690.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.14597,30690.6 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,48.4972,559746 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.283173,30858 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.253855,30827.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.186899,30812 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,53.1383,619016 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.332178,30880.4 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.321874,30888 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.258212,30861.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,48.2211,555602 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.294341,30834.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.580708,33566.4 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.190424,30827.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.154386,30834.8 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,48.5539,560401 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.275449,30971.8 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.263436,30956.6 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.191954,30926 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.1818,619586 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.332549,30986.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.268671,30986.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.255704,30971.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,48.3366,560682 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.300018,30956 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.591779,33713.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.181241,30929.4 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.154585,30914.2 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,48.6633,568122 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.27875,31066.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.251985,31020.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.193195,31024.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,53.1455,624663 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.336222,31081.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.267991,31093 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.25706,31036 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,48.1802,562131 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.293195,31009.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.585968,33816.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.183603,31017 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.147794,30990.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.1268,330154 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.235679,31024.6 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.166124,30994.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.132703,30960 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,42.6636,499208 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.299429,30956.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.284715,30956.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.209054,30941 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,30.7526,369228 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.287576,30922 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.599606,35118 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,44.7328,529385 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.283288,31043.6 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.603421,35243 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.139148,31028.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.141874,31024.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,35.9803,434790 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.279314,31078 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.198214,31059 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.139136,31055.2 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,42.597,499959 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.292939,31024.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.265554,31024.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.20087,30994.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,30.6962,369654 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.2883,30941 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.618647,35136.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.126662,30933.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.141925,30906.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.0131,431839 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.273458,31005.6 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.199551,30982.8 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.142956,30948.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,42.5826,498462 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.313637,30930 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.275902,30930 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.204204,30895.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,30.5259,371689 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.286974,30823.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.588553,35019 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.123884,30815.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.137881,30811.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.663177,30811.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.25459,36407.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.201714,30811.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,10.1198,123736 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_165.txt deleted file mode 100644 index 1b536588b3a9bb1d3c390b99f8694ba3f35a9114..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,188.372,1.71448e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.473155,26155.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.304273,26147.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.99152,63538.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.68727,34007.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,89.1236,881840 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.326973,26468.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.422308,26993.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.240767,26453.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,101.491,994426 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.4217,26629 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.475229,27211.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.303339,26629 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,128.452,1.25271e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.397739,26876.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.13777,30203 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,132.541,1.31537e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.380861,27340.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.13031,31002.8 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.284056,27347.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.317233,27332.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,145.481,1.48434e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.387735,27695.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.499287,28504.2 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.294975,27711.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,103.823,1.04714e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.410494,27749.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.476228,28592.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.320049,27738 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,128.284,1.29549e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.39569,27859.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.13118,32328.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.281304,27909.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.323614,27898 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,145.419,1.51675e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.385681,28263.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.467338,29216.8 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.292882,28282.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,103.807,1.0727e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.414878,28294 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.48474,29282 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.314219,28313.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,128.352,1.32989e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.385592,28414.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.1282,33438.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.274436,28392 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.308824,28400 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,89.2635,948562 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.33171,28584 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.295217,28576.2 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.209253,28557 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,64.5101,686757 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.366999,28656.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.318366,28645.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.291032,28645.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,72.3554,768390 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.343646,28615 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.681039,30789 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,89.1322,959610 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.343556,28929 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.682837,31141.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.272517,28948.2 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.317374,28959.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,75.5839,821413 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.321937,29180.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.305092,29150.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.276599,29165.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,64.5872,703058 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.370341,29184.4 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.305062,29188.2 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.281272,29180.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,72.6249,778701 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.339358,29192.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.689993,31454.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.252185,29165.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.290046,29169.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.2174,827532 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.3145,29374.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.306948,29363.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.281905,29355.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,64.7671,710500 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.37162,29390 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.323448,29367.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.284018,29359.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,72.5368,789943 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.34243,29367 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.69204,31767 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.269125,29374.8 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.286622,29352 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.356,827700 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.32291,29565 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.298994,29538.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.278015,29503.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,64.6655,710219 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.369829,29523.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.319186,29530.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.28476,29538.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,72.8163,798187 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.353181,29488.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.705097,31972 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.265644,29515.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.298751,29523.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,45.0911,503591 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.281752,29576.6 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.249989,29576.6 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.145644,29534.6 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,51.5279,573066 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.315909,29634 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.267244,29634 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.255909,29603.4 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,46.1579,513194 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.291653,29664.4 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.574384,32140.2 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,62.2278,696063 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.301413,29844.6 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.577495,32385.2 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.19797,29840.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.161215,29844.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.0515,539890 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.2881,29997.4 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.230546,29982.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.156601,29986 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,51.2828,576633 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.320779,30053.2 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.266961,30061 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.26389,30053.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,46.3387,527558 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.291461,30025 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.565687,32611.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.161196,30028.8 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.149298,30028.8 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,47.1128,544105 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.287774,30127.8 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.225566,30135.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.153247,30113 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,51.2804,582201 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.317784,30160.6 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.267724,30133.8 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.259391,30137.6 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,46.2918,529324 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.283877,30082 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.540048,32691 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.153772,30085.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.149535,30089.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,46.8508,541364 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.294667,30269.4 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.230718,30253.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.158949,30222.6 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,51.5104,580771 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.332625,30299.8 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.26924,30299.8 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.261963,30285.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,46.3933,529648 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.288082,30235.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.557968,32921.4 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.159653,30243.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.149817,30243.4 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,47.1085,546242 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.288529,30283.8 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.221574,30283.8 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.149664,30253.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,51.2349,585475 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.316498,30368.6 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.27082,30372.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.256293,30314.8 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,46.4799,537067 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.300236,30300.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.560516,33009.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.146003,30269.6 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.148434,30255 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,46.9077,546948 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.28963,30367.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.212139,30371.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.146431,30352.8 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,51.3607,585525 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.318878,30435.2 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.263941,30420.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.256395,30424.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,46.4972,533782 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.318559,30361.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.567991,33085.4 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.165996,30346.2 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.160313,30350.6 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.2471,303601 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.210079,30360 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.14661,30344.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.131001,30337.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.9345,400778 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.268818,30346.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.259998,30285 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.18213,30285.6 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.4139,306926 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.256146,30274.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.562986,33013.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.2054,459381 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.286482,30421.2 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.619344,34530 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.130047,30421.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.144262,30421.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.2646,368114 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.244337,30474 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.195404,30473.6 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.136678,30455.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,35.0359,400855 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.273394,30447.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.268907,30451.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.186054,30428.4 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.346,306268 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.254462,30410.6 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.581475,34500.2 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.114943,30391.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.139059,30376.2 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.1895,367568 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.234072,30429.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.20348,30429 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.137522,30395.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.8227,401303 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.269688,30375.2 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.273508,30344.6 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.188971,30310.6 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.4224,308667 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.256357,30299.2 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.574813,34362.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.114502,30300.2 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.13255,30300.2 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.575638,30300.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.22007,35714.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.206911,30300.6 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.78289,117079 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_166.txt deleted file mode 100644 index 36738891fb7bdaf914c3df108c203d821697398e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,186.531,1.73641e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.472995,26172.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.296203,26176 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,6.19001,73155 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.73582,34498.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,87.1731,857608 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.317855,26521 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.407838,27023 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.216542,26482.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,102.118,1.00207e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.422244,26643 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.468829,27240.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.313009,26654 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,129.491,1.26462e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.388964,26871.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.13265,29518.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,132.837,1.32714e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.38195,27347.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.10837,31048.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.288165,27347.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.32444,27355.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,145.465,1.48396e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.380254,27714.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.457232,28527.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.299646,27738.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,104.739,1.06261e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.422225,27753.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.48312,28592.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.318654,27788.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,128.754,1.31054e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.38698,27925.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.10193,32413.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.275922,27944.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.3259,27936.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,145.581,1.51079e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.380458,28271.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.464489,29198.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.309118,28275.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,104.308,1.07686e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.411249,28306 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.472208,29267.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.303307,28290.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,128.739,1.32451e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.385213,28399.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.10737,33443.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.285868,28392.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.317791,28393 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,89.453,943521 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.316389,28522.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.325982,28519 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.226008,28519 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,64.9095,691356 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.361393,28583.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.31939,28591.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.292069,28614.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,73.07,776023 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.339998,28626.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.687497,30804.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,89.8433,962539 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.332779,28929 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.700239,31149 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.287896,28929 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.318654,28932.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,75.8146,823152 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.315422,29138.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.298263,29127 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.31722,29134.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,65.0815,706067 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.369649,29119.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.313534,29123.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.293265,29127.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,73.0975,788785 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.347544,29104.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.702179,31431.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.272088,29112 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.303992,29100.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.1378,818091 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.312831,29313.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.298264,29283.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.276459,29283.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,65.2638,710267 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.370187,29333 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.307717,29279.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.284728,29295 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,72.9903,789893 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.336656,29336.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.685218,31740.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.280985,29329 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.306988,29298.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.3961,826425 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.321169,29477.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.304811,29485.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.283812,29492.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,65.0191,714935 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.362769,29492.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.311806,29496.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.288383,29488.6 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,73.2684,799801 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.346448,29461.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.693301,31910.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.268588,29446.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.303103,29435.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,44.9567,502652 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.279147,29587.6 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.220191,29591.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.143871,29557.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,51.3347,569372 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.306546,29587.6 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.293592,29591.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.262104,29576.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,46.1498,509793 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.2878,29603.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.613162,32086 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,62.5655,696593 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.30636,29840.2 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.631151,32408 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.196825,29829 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.160723,29829 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,47.1387,539387 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.28378,29979.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.259077,29945.8 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.15605,29912 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,51.2383,577583 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.315172,30036 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.302309,30024.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.262629,30013.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,46.2642,519585 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.275941,29959.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.607644,32549.6 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.177874,29974.6 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.152684,29959.6 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,46.981,541843 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.275115,30103.4 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.255705,30092 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.148959,30073.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,51.1828,581314 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.316741,30136.4 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.30243,30140.2 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.267153,30106.4 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,46.6132,524526 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.289227,30084.8 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.6156,32724.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.174668,30096.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.154962,30096.2 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,47.0275,544213 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.285291,30235.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.282213,30228.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.156492,30198.6 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,51.4135,582294 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.315224,30258.8 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.303787,30247.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.261234,30236.6 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,46.4337,526768 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.291122,30218 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.604528,32896 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.168082,30210.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.147993,30210.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,47.1397,547582 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.295639,30299.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.259371,30296.4 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.152716,30300.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,51.1781,582560 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.311326,30349.8 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.299928,30342.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.272689,30331.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,46.4606,526539 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.282366,30263.8 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.610249,32968.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.173509,30270.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.149471,30275.2 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,47.0671,544470 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.291358,30406.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.285246,30364.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.170757,30338.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,51.1047,582816 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.309451,30432 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.301649,30417.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.258238,30402 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,46.442,521959 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.296433,30369.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.623152,33112.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.182432,30388.6 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.157324,30396.2 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.1297,305493 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.215115,30418.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.19477,30403 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.131859,30373.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,35.1078,402907 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.266361,30362.8 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.25276,30362.8 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.192268,30337.6 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.5895,307234 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.25669,30304.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.618455,34386.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.2912,459228 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.269497,30377.2 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.644354,34463.2 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.130911,30373.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.143628,30361.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.3754,373876 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.2286,30469 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.183007,30469 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.136134,30446 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,35.0472,403166 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.279384,30442.4 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.313585,30427.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.185727,30385.8 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.4644,306032 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.252465,30359.2 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.603957,34399.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.118143,30355.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.139922,30356 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.2744,371834 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.233022,30389.6 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.251192,30389.6 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.14085,30375 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.7799,401179 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.266322,30386.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.296485,30386.4 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.184664,30363.4 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.4061,305656 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.257669,30332.8 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.612023,34342.6 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.117856,30302.2 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.133714,30298.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.579619,30299 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.22218,37033 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.129746,30299 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,10.0076,118690 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_167.txt deleted file mode 100644 index 023a48c70f216caab996a5f7754998e9daee720f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,185.747,1.72403e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.471019,26141.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.26005,26149.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,6.07212,68952.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.69734,34508 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,88.6917,884746 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.265528,26457 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.370738,27031.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.202348,26498.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,101.997,1.00071e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.41624,26636 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.479319,27191.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.3089,26597.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,128.724,1.25154e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.392318,26822.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.1251,30137 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,132.441,1.31389e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.376164,27260 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.12227,30923.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.271704,27290.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.310385,27286.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,144.94,1.4778e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.377586,27684.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.46849,28489 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.298512,27692.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,105.018,1.0657e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.430609,27711.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.473853,28550.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.310654,27726.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,128.544,1.29726e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.395645,27902.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.12197,32353.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.27923,27921.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.313591,27902.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,145.157,1.51043e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.379575,28217.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.459991,29167.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.312914,28202.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,104.785,1.07461e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.418263,28195 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.479626,29171.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.316997,28221.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,128.35,1.32167e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.385144,28369.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.10238,33424.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.27699,28373.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.306655,28373.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,89.3076,940678 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.363455,28549.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.315364,28523 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.231301,28507.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,64.8025,691325 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.366897,28569 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.310392,28569 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.283762,28572.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,72.495,768189 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.345156,28580.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.689026,30720.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,89.2446,942883 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.337899,28868.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.704893,31069.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.271442,28879.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.297202,28887.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,75.3221,816187 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.307319,29070.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.29477,29085.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.277426,29077.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,64.9142,699353 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.365962,29100.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.317477,29104.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.285631,29104.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,72.4936,785355 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.337707,29097 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.694115,31393.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.273796,29112.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.283633,29120 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,75.1319,822406 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.325278,29253 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.308408,29256.8 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.279467,29268.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,65.1364,708002 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.363249,29268.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.305189,29241.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.297003,29245.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,72.6949,789732 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.334917,29234 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.701423,31611.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.266379,29253.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.272082,29238 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,74.8667,824538 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.315851,29412.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.288152,29420.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.274187,29420.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,64.6983,710583 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.375128,29424.4 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.309842,29428.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.287973,29428.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,73.0753,796689 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.345035,29378.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.688413,31815.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.269138,29397.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.305291,29401.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,45.7878,504237 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.277157,29462 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.214104,29465.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.151519,27943.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,52.7513,592082 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.31692,29588 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.300306,29550 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.256153,29534.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,46.5979,529228 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.297086,29584.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.591689,32067.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,63.8081,704183 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.307607,29783.6 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.629724,32308.8 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.184102,29779.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.152985,29783.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,48.4794,540939 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.275365,29909.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.291256,29894 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.198367,29879 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.0634,600325 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.32613,29984.4 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.296703,29961.4 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.261643,29965.2 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,46.6864,534998 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.291966,29865.8 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.602704,32421.8 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.156888,29874.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.151013,29874.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,48.6048,547595 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.276516,30055.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.291294,30063.2 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.181881,30029.6 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,52.6883,601634 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.318929,30081 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.304158,30081 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.263404,30073.6 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,46.7287,538758 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.300791,30031.6 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.611485,32648.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.153561,30031.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.149715,30039.2 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,49.0322,554821 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.277599,30148.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.280415,30126.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.181196,30107.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,52.8603,603790 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.320542,30233 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.293023,30206.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.269215,30175.6 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,46.925,540495 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.287019,30115.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.634186,32748 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.155859,30085 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.145651,30088.8 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,48.6358,547277 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.279313,30271.2 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.279646,30275.4 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.188864,30255.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.0178,606910 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.333694,30318 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.306481,30278.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.264049,30283.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,46.947,545247 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.294232,30212.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.616201,32890.6 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.156313,30216.4 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.151301,30216.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,48.6415,555286 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.27646,30347 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.303135,30335.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.18398,30317.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,52.8918,608811 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.319307,30351.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.296177,30359.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.264017,30340.8 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,46.8842,543660 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.292664,30303.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.596572,33016.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.155877,30307.8 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.146079,30292.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.1613,304601 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.201387,30342 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.200901,30338.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.132159,30319 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.9671,400138 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.261586,30318.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.253177,30319 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.18231,30300 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.4331,306582 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.261829,30242.4 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.617053,34244 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,40.356,459774 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.275997,30310.4 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.642492,34312 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.135647,30295 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.139512,30310.4 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,32.2942,366911 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.239615,30425.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.254335,30410.2 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.13607,30375.6 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,34.9716,401133 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.281458,30371.8 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.332223,30357.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.181638,30338.4 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.4197,305544 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.259352,30284.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.611081,34286 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.117739,30265.2 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.145567,30261.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,32.298,372522 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.234175,30334 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.227308,30334 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.135481,30281.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.8162,400368 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.266527,30307 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.261023,30276.4 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.183808,30261 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.5108,308139 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.260242,30250.2 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.614461,34255.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.112313,30238.8 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.1288,30224 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.57665,30224 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.23413,36922.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.128595,30227.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.89324,118435 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_168.txt deleted file mode 100644 index 22ccbe9e1149bf3c0fbf39844e932b17acfcb7cb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,188.566,1.72179e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.476157,26048.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.277209,26067.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,6.07835,71442 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.74726,34453.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,87.8974,858826 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.293349,26414.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.393579,26950.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.21299,26414 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,103.891,1.01659e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.422737,26536 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.491523,27122.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.303857,26535.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,129.64,1.26051e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.395896,26758.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.12807,30100 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,132.752,1.3201e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.389463,27282.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.12683,30922.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.282873,27271.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.308677,27259.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,146.611,1.48933e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.391928,27645.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.491434,28458 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.291953,27657.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,106.788,1.08451e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.412445,27661 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.485911,28480.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.302098,27661 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,129.827,1.31423e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.406795,27925.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.13064,32406.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.276991,27887 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.320983,27894.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,146.885,1.51999e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.387735,28206.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.465821,29141.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.318264,28202.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,107.088,1.08664e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.411025,28210.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.477092,29195 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.302456,28226 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,129.694,1.33681e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.386973,28368.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.11293,33424 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.278412,28342.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.309688,28354.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,88.672,937616 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.338763,28507.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.306687,28504 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.211519,28511.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,64.9858,691893 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.368126,28557 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.330405,28545.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.286629,28553 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,72.428,760826 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.354264,28534.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.691458,30704.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,88.6422,948825 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.338571,28833.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.697097,31034.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.274552,28833.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.297246,28841.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,74.6706,814054 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.31804,29070.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.296645,29059 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.287986,29043.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,65.3855,705922 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.374193,29104.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.314571,29059 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.284408,29051.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,71.7419,768530 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.335006,29070.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.681648,31363.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.27397,27590.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.298001,27579 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,74.6253,821510 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.321175,29253 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.269944,29256.8 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.276108,29260.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,65.3408,710892 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.372292,29264.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.281131,29253.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.287416,29264.6 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,72.0366,778906 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.341937,29215.2 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.674518,31622.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.260031,29230.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.286456,29238 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,74.6093,821774 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.317183,29386 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.269707,29393.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.285002,29397.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,65.2614,713076 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.368433,29389.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.277072,29386 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.28531,29378.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,71.9477,781421 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.335922,29371 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.651632,31808.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.267973,29329.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.289764,29336.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,45.4726,481415 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.287691,29447 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.200985,29420.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.150092,29405.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,53.2107,592672 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.32549,29576.4 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.279749,29580.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.269662,29553.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,46.6923,522682 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.290303,29519.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.579369,32025.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,63.1366,704768 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.308849,29714.2 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.593833,32231.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.199467,29698.8 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.174399,29702.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,48.305,541769 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.290411,29875.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.256363,29875.8 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.181797,29860.4 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,52.7721,584470 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.322449,29882 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.279621,29890 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.253496,29870.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,46.7327,527628 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.290014,29820.8 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.573135,32422.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.169765,29836.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.144825,29836.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,48.1901,540975 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.277035,29992.2 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.255377,29954.4 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.188799,29954.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,52.7753,600403 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.322046,30014.6 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.265637,30018.6 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.264363,30011.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,46.8435,526901 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.309361,30002.2 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.605398,32630.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.209478,29990.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.156985,29990.6 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,48.1863,546523 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.283237,30112.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.260235,30097.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.181688,30066.8 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,52.7346,603205 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.327588,30191 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.269496,30191 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.256793,30175.8 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,46.5578,526391 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.328593,30146.6 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.580381,32817 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.187378,30139.6 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.15941,30139.6 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,48.2664,547531 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.282457,30240.2 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.253528,30209.8 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.193323,30214 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,52.7063,606921 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.33267,30259.6 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.27267,30262.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.258763,30252.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,46.5046,534309 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.287179,30203.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.571075,32904.6 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.163417,30199.6 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.151884,30199.6 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,48.3282,550677 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.28186,30335.8 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.255992,30305.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.186604,30298 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,52.853,611652 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.325215,30397.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.273682,30396.8 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.253023,30351.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,46.492,539149 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.292088,30322.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.559888,33050.4 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.161836,30311.2 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.144857,30299.6 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.3874,327502 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.235115,30345 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.171104,30329.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.134655,30299.8 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,43.021,489304 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.297919,30288.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.27731,30292.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.202142,30288.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,30.6324,364050 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.27772,30235.4 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.588975,34340.2 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,44.8337,517828 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.286155,30387.8 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.614268,34504.8 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.145157,30384.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.14967,30377 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,36.8878,431836 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.284741,30452.2 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.207519,30433.2 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.147077,30395 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,42.6788,489156 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.291198,30395.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.281867,30365 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.207058,30350.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,30.8938,363335 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.280722,30289.8 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.611113,34406.2 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.122841,30278.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.144153,30278.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.2201,424233 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.285489,30330.8 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.207006,30316 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.146872,30281.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.0324,492689 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.291787,30294.2 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.268107,30294.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.203973,30286.6 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,30.8337,356518 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.288017,30226 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.597366,34315.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.120351,30191.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.134489,30187.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.635638,30187.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.21791,37035.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.12871,30187.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.91135,116665 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_261.txt deleted file mode 100644 index 45af9a24777fe9eaecf431e5a0d510e295491e31..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,127.839,1.1273e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.390557,25144 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.309445,25132.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.74866,58302.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.72059,32929 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,74.46,711929 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.237394,25502 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.324882,25505.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.189132,25505.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,99.7869,948743 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.326603,25778 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.361144,26318.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.200486,25739.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,47.8934,455092 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.27861,25800.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.887042,28675.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,59.7064,572686 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.288888,25987.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.912929,29057.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.141235,26002.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.161388,26022.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,135.055,1.32036e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.307807,26435 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.405905,26442.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.28835,26481.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,101.363,992393 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.355832,26630.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.468689,27373.6 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.305099,26618.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,48.2533,468940 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.295826,26660.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.899457,30410.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.133382,26645 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.165068,26653 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,134.672,1.30908e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.315544,27066.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.398058,27043.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.277278,27043.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,101.475,1.01798e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.356894,27158 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.459325,27993.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.303762,27127.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,48.2293,478325 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.31059,27147 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.898324,31506.8 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.132204,27140.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.16446,27152.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,79.656,814898 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.27118,27297.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.198412,27297.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.151481,27301.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,60.0469,613749 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.310449,27427.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.265694,27400.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.295269,27412.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,28.0435,290475 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.25594,27328.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.482314,29181.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,46.4545,477555 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.279032,27568.4 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.545572,29468 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.131814,27587.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.159692,27591.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,68.7911,720711 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.281758,27808.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.253868,27778.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.208249,27782 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,59.9206,624912 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.309457,27885 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.300414,27873.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.280287,27889 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,27.7967,291597 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.266194,27843.2 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.434398,28858 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.122822,27858.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.15829,27846.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,68.8088,726603 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.282027,28046.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.255922,28034.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.204447,28019.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,60.0078,631663 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.313067,28084.4 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.296215,28092.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.281343,28100 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,27.8042,287104 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.278226,28065.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.496516,30160 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.125311,28077 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.154796,28084.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,68.9851,734403 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.281355,28252.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.256722,28252.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.210597,28267.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,60.0106,636585 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.301823,28316.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.304267,28286.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.28035,28298 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,27.5868,290160 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.265406,28240 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.503703,30441.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.12485,28259.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.154482,28275.4 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,38.9478,425707 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.221689,26868 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.141222,26852.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.133574,26849 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,42.3895,454461 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.296184,28305.8 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.260588,28305.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.227627,28305.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,19.4529,221173 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.222508,28255.4 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.457092,30464 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,45.522,483025 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.298673,28366.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.639824,30616.8 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.217945,28370.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.178559,28374.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,40.6044,438384 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.254968,28484.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.189906,28469.4 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.142437,28450.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,42.1817,457021 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.298104,28517.6 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.29011,28502.4 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.22421,28452.6 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,19.2937,218164 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.212351,28398.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.453104,30687.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.112415,28402.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.134616,28402.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,40.2507,430708 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.241778,28569.8 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.206706,28569.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.143916,28554.6 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,42.0704,458056 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.292549,28539.4 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.307762,28543.2 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.224198,28543.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,19.3591,223505 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.208376,28452 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.441463,30775 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.122802,28459.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.131224,28459.6 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,40.2674,434921 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.252927,28551.2 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.2022,28535.8 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.143827,28535.8 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,42.7277,461822 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.305886,28505.4 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.258526,28505.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.231718,28490.2 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,19.3671,224635 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.21292,28463.4 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.440369,30843.8 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.116537,28463.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.137433,28463.4 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,40.3751,433192 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.243282,28559.2 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.206021,28544 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.142706,28536.2 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,42.1652,455575 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.301412,28528.8 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.255857,28536.4 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.219116,28471.4 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,19.3949,221535 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.220082,28410.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.448932,30821.8 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.132511,28410.6 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.149229,28414.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,40.4039,431447 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.251909,28559.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.202962,28555.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.145484,28540.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,42.3478,458456 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.295544,28536.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.272056,28517.4 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.232939,28506 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,19.4228,224641 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.209586,28460.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.442936,30901.8 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.112243,28460.2 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.132518,28467.8 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.5804,284918 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.251302,28475.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.163615,28475.4 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.138957,28460 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.0129,372945 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.288134,28479.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.244435,28475.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.156217,28425.8 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,22.5114,257579 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.260402,28341.8 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.565206,31970 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,35.341,372860 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.278604,28464.2 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.65702,32126.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.168146,28464.2 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.147762,28456.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,33.0313,367602 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.236991,28514 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.16494,28514 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.136096,28487.2 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,34.0946,373873 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.281483,28521.8 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.242777,28506.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.160569,28491.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,22.38,257012 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.262552,28430.2 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.571095,32084.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.11632,28422.4 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.131929,28422.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,33.208,370059 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.234508,28545 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.156902,28545 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.138726,28472.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.2549,374759 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.287737,28472.2 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.242924,28472.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.173835,28468.4 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,23.2326,262480 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.259678,28422 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.576547,32069 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.120069,28391.4 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.133849,28376 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.577596,28391.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.18945,33290 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.179046,28406.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.66568,110034 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_262.txt deleted file mode 100644 index afd86204a851f474212cc8a01710567f273029fc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,130.974,1.14388e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.422128,25056.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.337956,25041.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.73444,55571.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.73878,32802.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,74.7484,714104 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.223724,25414.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.294456,25410.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.169944,25429.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,99.5155,942655 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.313055,25632.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.361784,26169.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.182188,25659 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,47.4843,447264 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.287033,25689.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.934472,28579.8 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,59.9335,548541 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.314692,25938 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.982721,29015 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.150764,25945.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.180946,25933.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,135.028,1.30745e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.313912,26396 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.447402,27066.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.298123,26376.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,100.959,984439 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.3537,26570.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.458276,27294.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.296287,26570.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,48.0152,459941 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.310622,26535.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.960801,30278 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.132275,26527.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.157625,26547 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,134.782,1.34262e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.308229,26962.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.396177,26970.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.28124,26963.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,100.934,1.00789e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.348401,27108.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.458417,27951.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.2902,27104.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,48.288,476754 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.315006,27116 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.901346,31483.2 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.134297,27120.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.160998,27106 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,79.7201,815316 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.261189,27267.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.212888,27256.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.154655,27256.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,59.7785,609881 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.302864,27382 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.263519,27374.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.271416,27359.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,28.0965,286179 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.26293,27305.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.494506,29170.2 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,46.4392,475503 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.278738,27541.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.525149,29464.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.132838,27545.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.157496,27572.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,69.0729,721180 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.284684,27732.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.259467,27724.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.217272,27728.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,61.6447,620784 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.307454,27800.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.264792,27777.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.275378,27781.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,27.893,287881 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.273906,27742.8 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.499799,29764.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.126259,27746.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.148768,27750.8 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,68.6433,724543 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.284261,27931 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.257458,27931 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.209068,27931 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,60.311,638600 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.310737,27983.4 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.264466,27995.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.288504,27964.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,27.7523,289975 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.265215,27929.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.49457,30031.2 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.12334,27933.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.147026,27933.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,68.6011,729542 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.28074,28133.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.263902,28133.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.21548,28122.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,60.1866,639645 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.311613,28151 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.263928,28155 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.283538,28174.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,27.8113,293495 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.266328,28112.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.489245,30268 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.122034,28116.8 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.149829,28120.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,38.5317,421434 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.217446,28191.2 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.147967,28191.2 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.129183,28175.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,42.097,451424 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.305055,28197.8 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.259896,28197.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.230398,28197.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,19.3353,222079 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.217471,28124.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.436817,30356 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,45.8446,486902 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.306315,28269.6 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.627261,30497 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.219947,28247 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.184434,28247 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,40.6316,435687 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.26211,28414.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.199538,28399.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.154303,28315.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,42.3889,455434 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.301105,28398.2 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.262699,28375.6 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.231858,28348.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,19.5206,220360 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.214131,28272.6 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.436779,30561.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.113344,28280.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.131218,28280.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,40.2284,430595 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.241055,28413.4 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.206924,28413.4 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.145798,28394.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,42.3943,456062 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.318206,28409.4 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.265957,28394 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.235448,28375 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,19.4523,224902 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.211768,28303 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.421264,30622.2 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.107852,28303 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.130757,28303 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,40.3351,432197 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.255058,28447.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.208414,28436.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.145253,28440 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,42.2895,455228 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.294494,28463.4 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.260152,28467.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.232075,28406.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,19.5995,226839 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.214444,28314.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.442001,30664.8 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.127622,28318.6 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.151276,28322.4 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,40.4216,432729 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.251001,28475 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.199999,28475 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.140882,28440.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,42.4534,458259 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.306072,28418 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.257426,28414 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.231122,28410.2 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,19.3812,223851 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.210143,28334.2 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.439582,30714.6 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.11015,28334.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.133849,28318.8 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,40.2117,426849 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.24325,28471.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.204114,28425.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.136326,28394.8 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,42.3894,456951 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.299633,28471.8 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.264127,28468 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.253753,28441 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,19.6324,225486 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.220683,28384.2 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.439492,30799.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.110009,28388 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.133113,28391.8 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,25.5856,285321 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.250406,28414 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.160038,28414 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.133542,28395.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,34.1475,373680 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.287411,28391 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.306469,28394.8 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.15637,28383.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,22.6132,257441 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.271735,28334.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.572727,31947.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,35.4462,373580 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.277727,28399 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.656117,32080.4 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.168613,28414.2 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.143622,28414.2 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,33.0397,369920 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.24115,28499 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.161759,28495.2 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.130751,28472.2 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,34.2683,374324 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.284798,28445.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.299531,28445.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.163064,28426.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,22.5977,257851 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.264248,28399.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.56959,32031 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.116518,28368.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.132921,28368.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,33.1562,369473 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.239724,28476.2 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.159667,28472.4 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.128787,28445.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,34.2024,373938 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.291109,28426.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.299448,28411.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.160889,28399.6 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,22.6335,257597 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.270763,28353.6 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.574755,32012 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.116223,28345.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.137164,28322.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.582128,28318.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.233,34445.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.130976,28345.6 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.97631,111206 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_263.txt deleted file mode 100644 index 49071a42cb0be1bbfc0c55e8c370b45addc02c69..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,149.5,1.34458e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.40076,26003.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.24693,25988.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.77476,64404.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.73732,33446.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,77.2371,765319 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.198579,26286.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.294085,26800.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.146694,26297.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,145.879,1.42331e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.343147,26578.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.364849,27180.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.207563,26601.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,51.4426,490073 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.324177,26643.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.978592,29743.4 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,62.5683,622236 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.298718,26829 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.979239,30056.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.166489,26840.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.160242,26833 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,141.113,1.42576e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.331185,27225.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.454456,27964.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.2894,27255.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,147.068,1.48146e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.379978,27486.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.474256,28279.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.304568,27501.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,51.8237,510629 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.324082,27512.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.976686,31587.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.165004,27535.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.168773,27527.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,140.914,1.45471e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.336215,27860.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.447492,28734 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.301944,27837.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,147.121,1.51427e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.375141,28005.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.474769,28920.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.30844,28016.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,51.9479,520379 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.327582,27978.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.988404,32647 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.164019,28001.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.161324,28005.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,81.8219,866675 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.28908,28127.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.242873,28105.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.153894,28113.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,86.0812,904262 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.324715,28245.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.312453,28260.8 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.275416,28268.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,31.47,332524 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.275685,28242 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.537712,30263.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,51.624,544118 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.280056,28479.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.64241,30513 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.169286,28456.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.160383,28460.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,73.1071,779919 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.287967,28678.2 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.296549,28670.8 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.269297,28659.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,86.6251,921348 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.329214,28742.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.317463,28742.8 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.278674,28754.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,31.3964,339929 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.277074,28731.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.529405,30898.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.12142,28727.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.147974,28731.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,73.1379,786932 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.288555,28959.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.290929,28944.4 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.263877,28921.6 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,87.0549,938136 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.332466,29074 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.268152,29016.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.288913,29013 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,31.2147,342825 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.268626,28940.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.486468,31191 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.12398,28952 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.150303,28955.8 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,73.0711,793528 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.283372,29146.4 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.261912,29146.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.258021,29142.6 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,86.7318,942908 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.325195,29192.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.266936,29196.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.283819,29204 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,31.0783,343405 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.276383,29139.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.495326,31458.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.124857,29127.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.148978,29139 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,40.1748,440482 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.252446,29196 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.183308,29196 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.137574,29177 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,56.259,614735 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.311775,29181 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.269336,29184.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.267,29188.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,22.2687,257834 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.234181,29142.8 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.471145,31515.6 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,50.5911,557605 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.317796,27888.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.583344,30249.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.201195,29303.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.173586,29276.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,43.7502,492967 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.262661,29430 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.214886,29414.6 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.148608,29395.6 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,56.8286,623161 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.310085,29457 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.268651,29411.2 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.267301,29395.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,22.2759,263969 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.224165,29327 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.474507,31749.6 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.126534,29304 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.138168,29304 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,43.2961,488922 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.271308,29441.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.213631,29437.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.147737,29403.4 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,56.4881,619043 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.31285,29476.4 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.263096,29461.2 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.256626,29445.8 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,22.2316,261121 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.220473,29369.2 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.451178,31818.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.115801,29357.8 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.133772,29357.8 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,43.6382,494029 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.288281,29491 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.214949,29495 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.147142,29487.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,56.7421,626686 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.314667,29437.6 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.266501,29445.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.258879,29426 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,22.2647,262846 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.226443,29384 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.445085,31863.2 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.114329,29357.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.132531,29361 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,43.6635,495499 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.273048,29464.2 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.207397,29464.2 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.155647,29449 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,56.4914,623114 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.305144,29476.2 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.273835,29483.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.258853,29437.8 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,22.3705,264427 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.220735,29422.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.444414,31932.4 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.119622,29407.2 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.134738,29411.2 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,43.5052,488218 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.265599,29476.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.213849,29480.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.153183,29472.6 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,56.6067,626227 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.31043,29482.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.261662,29475.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.255742,29475 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,22.3768,262976 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.225106,29424.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.439358,31969 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.11017,29413.4 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.13687,29413.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,26.8292,297098 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.260844,29460.8 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.197938,29457 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.138476,29433.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,43.6757,482681 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.296977,29452.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.274373,29448.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.193957,29448.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.4725,296284 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.279531,29371.4 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.624809,33200.8 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,39.504,445501 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.305003,29551.4 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.638378,33380.8 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.142706,29547.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.155276,29528 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,35.9164,413756 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.283415,29613.2 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.199179,29609.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.13806,29570.8 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.6754,487696 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.299723,29596.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.267812,29600.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.197784,29566 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.3593,297445 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.276024,29485 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.614115,33318.4 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.125299,29485 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.141888,29485 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,35.9393,414572 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.278231,29573.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.197785,29565.4 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.138502,29500.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.6676,482678 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.293476,29592 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.270846,29592 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.204011,29530.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.5382,298627 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.283295,29458 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.622243,33276.2 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.135679,29442.8 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.134899,29442.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.616099,29439 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.27162,35828 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.129593,29442.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.76313,113945 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_264.txt deleted file mode 100644 index ed370c72a35a37cf5558de4eb8e00f30a5d396d0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,148.994,1.32122e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.395691,26011.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.32693,26026.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.7306,59843.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.84757,34416.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,77.9006,775224 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.225375,26349.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.302373,26874.4 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.15287,26368.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,145.387,1.40239e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.343045,26638.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.357355,27232.8 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.214271,26657.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,52.2534,510934 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.324337,26680.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.93906,29819 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,62.7484,618697 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.296255,26932.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.962548,30235.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.184032,26932 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.174713,26916.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,141.241,1.41926e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.338661,27377.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.454379,28094 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.309047,27362.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,147.711,1.48472e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.378155,27559 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.47848,28321.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.350666,27535.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,52.4503,506217 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.33299,27504.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.944763,31548.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.160838,27477.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.16903,27489 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,141.727,1.45782e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.334065,27913.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.458654,28802.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.298713,27917.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,147.67,1.5201e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.383998,28128 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.465149,29024 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.305655,28097 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,52.2856,523803 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.324471,28055 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.974343,30865.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.176921,28032 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.168172,28016.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,81.8246,869741 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.285867,28171.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.205734,28171.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.15872,26745.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,86.2508,906218 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.336004,28356.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.311959,28356.6 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.294302,28356.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,31.6299,334714 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.277438,28337.4 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.49089,30351.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,51.4024,542566 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.274891,28564.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.58534,30578.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.174252,28510.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.170354,28514.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,73.1819,781119 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.281784,28790.6 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.261529,28760.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.278866,28784 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,86.5602,928592 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.33219,28898.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.312721,28876 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.302034,28872 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,31.6048,339298 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.276332,28807.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.507127,30989.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.12759,28826.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.161535,28819 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,73.447,793508 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.299147,29043.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.265803,28982.8 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.2763,28986.8 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,86.6137,937295 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.328471,29074 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.303256,29062.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.29006,29074 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,31.5062,341941 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.280708,29005.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.497968,31283 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.131948,29017 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.156505,29024.6 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,72.909,795243 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.284844,29249 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.261714,29241.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.262513,29233.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,86.6832,942072 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.320683,29265.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.3011,29272.8 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.288933,29280.4 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,31.3662,347207 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.27564,29203.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.492195,31523.2 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.123621,29180.8 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.161791,29180.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,40.2632,444370 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.235896,29291 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.183474,29264.4 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.142968,29249.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,56.4478,620816 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.309157,29341.4 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.269457,29303 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.26563,29291.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,22.1958,260134 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.22718,29265 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.451729,30451.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,50.4672,560744 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.308011,29391.4 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.599286,31787.2 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.234744,29387.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.187577,29375.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,43.355,490122 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.26444,29525.2 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.221817,29525.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.158886,29525.2 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,56.7003,627833 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.311109,29537 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.338418,29540.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.261137,29514 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,22.3318,258914 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.233419,29476 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.447773,31948.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.120096,29479.8 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.13735,29491.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,43.5133,489889 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.264575,29571 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.226654,29555.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.160472,29555.8 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,56.607,626551 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.314565,29552 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.276517,29544.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.276043,29548.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,22.3335,263821 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.223103,29502.6 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.444132,31958.8 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.115487,29487.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.138361,29487.2 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,43.4609,491459 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.26218,29575 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.222424,29575 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.159134,29575 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,56.3371,626651 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.318962,29579.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.296062,29567.8 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.266821,29529.6 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,22.3506,262243 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.223583,29449.2 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.432414,31951.2 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.116966,29433.8 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.142668,29437.6 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,43.3827,489727 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.264158,29602.4 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.218584,29591 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.1622,29575.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,56.4339,622509 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.308638,29606.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.268472,29610.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.270987,29579.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,22.3047,263895 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.220582,29518.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.440913,32070.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.116639,29510.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.140204,29495.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,43.3462,490552 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.261016,29625.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.2171,29610.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.156274,29598.8 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,56.4875,627562 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.317047,29591 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.31402,29587.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.26974,29575.6 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,22.2393,264178 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.223096,29537 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.439633,32115.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.112326,29525.6 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.138578,29533.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,26.5744,298423 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.25212,29579.6 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.18508,29579.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.146546,29541.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,43.4373,481503 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.307896,29595 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.352932,29579.8 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.216543,29564.4 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.8001,301597 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.289804,29503 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.617744,33362.6 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,39.2018,446936 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.308101,29610 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.628739,33504.4 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.137862,29610 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.146188,29610 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,35.9158,414493 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.281253,29644.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.197976,29644.6 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.14958,29629.2 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.4436,480200 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.287646,29667.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.274674,29652 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.219749,29632.8 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.2219,298626 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.277842,29590.2 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.608412,33480.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.126661,29590.2 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.144082,29574.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,35.8793,415777 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.293189,29640 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.205157,29640 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.158162,29616.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.7266,480090 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.306955,29650.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.273541,29635.2 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.224754,29616.2 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.3017,298301 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.282514,29577.8 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.61521,33452.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.128326,29562.4 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.142233,29558.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.645008,29558.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.2358,34745.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.184837,29562.4 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.68554,114383 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_265.txt deleted file mode 100644 index 50304795f436db23f1b47f41b544d664fa71eac9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,150.552,1.35888e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.382418,25938.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.257689,25958 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.86179,62845.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.84681,34340.4 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,77.6417,765948 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.207615,26247.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.325778,26757.6 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.159058,26236 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,145.852,1.42986e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.356312,26569.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.358014,27121.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.194943,26538.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,52.3246,501011 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.321899,26609.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.972392,29725 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,62.9844,626051 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.300069,26810 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.983777,30124.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.17022,26810.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.162905,26802.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,141.377,1.42327e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.329854,27236.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.463089,27972.6 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.289489,27259.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,147.681,1.48593e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.372913,27490 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.475754,28271.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.306226,27493.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,52.6049,521657 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.329522,27451 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.981095,31530.2 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.162117,27477.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.162764,27470 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,141.39,1.46028e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.327148,27868.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.451159,28726.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.289183,27841.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,147.292,1.51208e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.471459,28066.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.465469,28989.4 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.301272,28055.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,52.168,528583 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.327805,28055 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.981364,32803 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.155705,28062.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.160735,28070.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,81.8565,870716 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.287596,28186.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.239371,28171.2 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.157522,28175 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,86.1486,908432 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.323148,28253.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.301502,28257 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.287108,28272.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,31.5869,338316 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.283557,28238 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.540272,30260 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,51.5673,545026 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.28437,28518.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.616055,30628.2 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.169004,28541 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.154969,28544.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,73.1859,779608 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.27756,28679.4 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.29299,28653 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.26851,28657 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,86.4539,926669 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.320779,28830.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.303576,28834.4 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.283192,28846.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,31.4412,343147 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.274776,28735.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.524304,30917.4 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.120479,28746.8 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.150585,28758.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,73.1047,784667 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.281611,28944.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.300356,28944.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.273567,28948.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,86.5005,934084 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.320491,29005.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.309111,28975.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.274513,28982.8 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,31.8439,335448 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.320767,28925.8 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.520132,31214.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.131027,28937.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.162879,28941 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,72.7452,785752 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.280894,29157.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.299032,29165.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.270738,27685.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,87.9394,946851 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.328971,29173.2 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.311525,29169.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.284369,29177 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,31.4698,347643 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.27571,29127.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.538602,31485.2 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.128217,29154.2 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.155928,29161.8 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,40.3491,443430 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.243557,29207.4 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.185292,29215 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.138815,29199.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,56.8501,621376 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.312875,29211.2 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.27173,29211.2 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.260895,29203.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,22.2888,259253 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.226175,29158 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.446026,31538.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,50.6775,560423 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.307133,29261 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.595459,31679.8 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.226328,29253.4 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.180761,29253.4 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,43.6579,490840 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.26915,29402.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.216011,29391.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.156952,29326 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,56.8946,626642 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.305817,29410.8 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.262078,29410.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.270603,27884.6 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,22.2916,259351 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.232062,29330.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.452125,31787.4 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.122297,29315 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.133773,29318.4 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,43.5641,496343 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.266226,29502.4 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.20839,29490.8 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.144172,29444.8 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,56.909,629275 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.313323,29479.6 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.267307,29483.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.258059,29441.2 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,22.3601,261086 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.229388,29346 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.444542,31802.2 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.114572,29315.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.136979,29315 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,43.4579,489278 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.281528,29472 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.21589,29464.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.158578,29445.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,57.0732,630778 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.304421,29460.8 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.263667,29453.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.25927,29453.2 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,22.4552,261117 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.230489,29349.6 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.451255,31821 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.118591,29334.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.136082,29326.4 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,43.5808,495239 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.271641,29468.4 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.212319,29468.4 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.151634,29460.6 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,57.1736,634202 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.323461,29438.2 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.324248,29445.8 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.265074,29430.4 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,22.5101,264327 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.230584,29407 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.448759,31954.8 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.115992,29395.6 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.132357,29395.6 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,43.5573,494485 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.260505,29453.2 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.214732,29453.2 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.151001,29453.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,56.8903,625869 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.312209,29432.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.275122,29429.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.264888,29414 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,22.5955,264234 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.23799,29374.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.44554,31941.8 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.116902,29371.2 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.139641,29371.2 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,26.776,298332 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.259723,29422.2 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.193643,29422.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.139845,29406.8 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,43.4762,479453 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.296286,29445 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.280638,29429.6 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.202527,29410.2 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,26.5241,297514 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.280017,29340.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.631376,33193 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,39.2177,444915 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.295845,29463.6 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.633552,33293 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.129983,29448.2 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.143276,29448.2 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,35.9008,414000 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.294046,29494.4 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.196543,29490.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.137887,29475 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.5729,485423 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.288203,29555.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.26972,29524.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.195091,29497.6 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,26.3235,296875 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.275641,29431.6 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.618121,33276.4 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.123264,29431.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.134566,29427.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.1377,415367 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.279871,29516.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.201458,29481.8 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.139244,29470.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.5938,481908 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.295012,29504 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.271332,29488.8 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.215391,29488.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,26.5262,298122 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.273356,29404.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.620394,33249.2 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.127449,29400.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.132012,29400.6 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.614307,29400.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.25512,35808.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.129037,29400.6 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.76575,113784 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_266.txt deleted file mode 100644 index 2fec11a0373a923cedd956091368a97e10916b03..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,157.049,1.42437e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.408983,26264 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.282687,26279.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.91682,60773.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.68663,34270.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,78.2223,786958 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.210668,26541.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.291756,27063 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.150566,26553 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,126.993,1.24914e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.343666,26756.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.357938,27350.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.195104,26783.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,52.8028,515675 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.330744,25608.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.931586,28666.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,63.1143,630945 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.301374,26999 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.933979,30261.2 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.18012,27034.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.166655,27038 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,143.405,1.45185e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.329886,27453 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.455786,28181 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.292895,27460.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,129.128,1.30214e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.380113,27567 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.461597,28344.8 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.303186,27571.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,53.3772,531164 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.338232,27567.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.93675,31602.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.150649,27571.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.178693,27571.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,144.234,1.48584e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.33045,27966.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.443537,28832 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.292471,27943.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,129.334,1.32744e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.377597,28114.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.453579,28992.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.290258,28092.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.0468,537122 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.358981,28058 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.960603,32702.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.16085,28058 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.166931,28050.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.759,871886 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.285643,28191.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.249669,28191.4 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.185452,28199 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,76.9195,815515 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.334098,28355.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.295704,28374.4 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.280734,28374.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,32.5495,343613 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.291799,28332.6 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.481424,30320 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,53.1383,571387 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.285579,28618.8 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.579932,30637 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.155531,28561.4 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.161464,28569 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,74.8493,805997 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.287793,28825.8 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.258124,28798.8 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.270962,28810.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,77.4678,838618 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.329745,28868.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.30142,28879.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.274936,28883.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,32.5414,349991 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.282558,28837.6 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.498608,30962 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.123052,28833.6 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.151033,28822 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,74.917,814945 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.296345,29035.6 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.264639,29039.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.273247,29047.2 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,77.696,847097 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.338833,29107.8 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.305822,29115.6 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.298084,27689.2 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,32.4144,351142 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.291525,29032 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.486352,31305.6 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.121906,29043.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.158892,29047.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,74.6395,816671 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.292882,29256.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.258073,29256.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.27644,29264.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,77.7032,850474 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.335512,29287 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.290411,29298.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.276901,29287 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,32.2584,353586 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.289056,29226.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.491646,31568.6 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.124115,29241.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.156268,29245.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,41.0142,464333 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.240817,29314 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.156594,29317.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.142086,29294.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,53.1458,589389 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.314181,29310 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.304126,29302.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.256216,29306.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,22.8487,264330 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.215628,29241.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.435908,31614.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,52.4881,589318 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.304498,29462 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.584496,31858 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.203153,29443 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.166846,29443 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,44.7881,501309 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.266552,29549.8 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.236479,29542 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.153299,29534.4 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.2674,595662 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.310142,29580.4 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.27061,29549.6 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.265196,29519 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,22.9301,264936 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.222962,29469.4 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.443895,31896.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.119084,29488.4 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.134879,29496.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,44.7429,502162 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.268177,29626.4 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.240844,29611 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.164883,29611 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,53.1094,592137 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.309982,29640.8 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.29708,29640.8 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.26332,29625.4 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,22.8085,265792 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.229713,29536.2 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.426531,32031 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.111513,29544.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.143206,29513.4 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,44.6512,501649 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.270193,29736.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.229976,29740.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.160383,29706 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,53.1013,593487 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.322487,29624.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.306865,29639.6 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.25237,29632 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,23.0185,265912 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.228312,29616.6 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.440854,32141.8 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.112773,29620.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.141586,29605 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,45.1498,505203 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.265086,29666.6 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.232164,29666.6 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.164978,29670.4 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.4391,599384 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.309751,29640 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.271525,29636.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.254674,29628.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,22.9036,267502 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.219288,29590.4 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.439069,32134.8 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.117388,29582.6 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.135366,29586.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,44.6942,503734 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.269465,29690 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.228344,29674.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.1635,29655.4 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,53.4426,599226 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.314264,29674.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.309079,29659.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.260229,29651.6 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,23.0238,267446 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.230955,29602 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.443441,32180.8 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.116243,29605.8 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.141785,29605.8 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.1801,306636 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.255339,29632.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.202194,29632.4 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.142521,29632.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,43.0601,480255 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.301515,29697.6 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.281208,29705.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.224875,29670.8 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,27.269,305710 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.267052,29602 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.625072,33466.2 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,41.0977,460679 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.285746,29743.4 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.647145,32329.8 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.159551,29743.4 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.147174,29724.4 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,37.0261,419154 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.269951,29800.4 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.214911,29800.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.141933,29781.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.0102,481247 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.294341,29819.6 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.266744,29819.6 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.224812,29789.2 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,27.233,305554 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.275448,29694 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.646633,33607.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.132243,29694 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.140032,29694 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.8916,417611 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.272523,29812 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.219864,29812 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.14917,29789.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,42.9744,482386 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.30058,29796.8 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.268837,29785.4 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.216754,29766.4 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,27.1978,302739 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.26556,29716.8 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.642179,33630.6 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.14204,29701.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.134662,29694 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.62719,29694 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.222,36217 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.129823,29697.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.67455,114885 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_267.txt deleted file mode 100644 index 93685543e165975790b21ed3b63feb5f731b76e3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,159.671,1.45097e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.414032,26199 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.277419,26206.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.93183,63592.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.73804,34308.8 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,77.5136,776185 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.204089,26517.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.285169,26521 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.155007,26517.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,127.27,1.25111e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.348664,26680.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.351409,27259.2 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.19509,26680.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,53.0549,514303 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.322122,26730.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.960058,29834.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,62.9681,628946 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.290238,26930.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.951002,30196.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.185381,26958 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.165177,26958 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,143.662,1.44917e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.335621,27403.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.46296,28143.2 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.285438,27388.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,130.149,1.30797e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.378846,27554.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.461866,28340.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.294385,27532.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,53.593,528992 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.338545,27582.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.943111,31579.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.179487,27529.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.169733,27529.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,144.141,1.48523e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.329348,27935.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.459088,28816.4 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.288075,27912.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,129.962,1.33662e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.38154,28042.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.467312,28969 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.304459,28050.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.3228,541727 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.333674,28031 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.951296,32722 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.166136,28019.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.161759,28027.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.4119,864320 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.269355,28141.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.251513,28129.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.182246,28133.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,77.2159,815017 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.337451,28298.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.304139,28264.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.280632,28279.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,32.5272,342084 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.304466,28229.8 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.511754,30240 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,53.1151,569672 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.291012,28527.2 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.590615,30587.4 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.163993,28519.8 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.168287,28523.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,75.0629,805863 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.290321,28726 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.261106,28733.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.276549,28741.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,77.6741,836652 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.337425,28783.4 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.30309,28768 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.289862,28775.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,32.469,344041 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.291384,28764 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.504125,30919 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.124108,28756.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.15541,28756.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,74.762,808017 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.293624,28967.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.258335,28952 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.285252,28940.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,77.604,841181 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.33068,29016.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.298783,29020.4 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.283442,29020.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,32.2171,347243 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.296856,28963.4 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.511684,31240.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.135359,28982.4 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.166476,28986.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.0394,815546 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.29859,29142.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.26028,29112.2 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.279192,29119.8 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,77.7645,849580 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.335723,29199.6 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.298911,29188.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.281572,29192 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,32.2133,351783 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.291435,29142.6 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.508861,31466 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.124991,29135 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.152646,29146.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,40.9805,463475 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.232594,29211.4 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.152019,29184.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.135743,29139 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,53.2529,588193 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.306066,29200 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.3254,29200 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.257592,29162 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,22.9867,263282 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.225362,29146.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.455664,31500.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,52.8654,586985 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.307268,29313.8 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.594301,31713.6 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.210764,29310 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.17221,29298.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,45.3646,502108 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.277368,29447.2 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.25107,29447.2 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.166514,29443.4 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.3835,591846 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.312357,29420.4 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.314724,29405.2 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.255122,29409 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,23.0297,265575 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.223673,29366.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.444933,31842.6 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.119622,29393.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.134604,29397.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,44.8916,501681 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.25843,29496.6 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.2355,29477.4 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.168998,29481.2 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,53.374,591221 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.31219,29475.4 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.262372,29448.8 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.264952,29437.4 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,23.1118,265844 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.224805,29410 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.435806,31867.2 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.108589,29410 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.13769,29410 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,44.9201,502243 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.26563,29551.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.248153,29555.6 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.178182,29528.8 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,53.8614,599626 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.310801,29562.6 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.28076,29501.4 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.257471,29486 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,23.0662,264503 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.233144,29436.4 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.438775,31938.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.120959,29421 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.147602,29424.8 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,45.0612,504327 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.275282,29585.4 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.243153,29593.2 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.172024,29573.8 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.3727,592635 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.308933,29582 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.262495,29582 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.268223,29566.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,23.3319,268437 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.239307,29536 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.437143,32088.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.119718,29547.4 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.136569,29532 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,44.8987,502620 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.263097,29624.4 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.242533,29593.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.168633,29593.6 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,53.3799,595506 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.306674,29601.6 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.259871,29609.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.259,29597.8 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,23.0855,265645 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.230194,29582.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.432292,32161.4 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.109715,29586.4 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.137657,29571 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.1782,306141 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.245106,29632 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.186232,29628.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.142226,29597.6 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,42.9186,476528 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.299729,29643.6 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.271442,29643.6 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.229536,29597.6 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,27.2244,305476 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.273778,29563.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.64797,33435.2 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,41.1902,458662 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.286846,29689.6 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.642614,33542.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.163525,29670.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.143084,29666.8 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,36.9691,417182 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.271416,29770 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.213944,29762.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.147404,29739.6 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.1152,483409 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.294827,29808.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.314462,29762.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.224299,29743.4 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,27.2069,305348 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.267768,29743.6 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.639382,33630.6 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.140268,29713 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.14254,29693.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.8511,417525 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.26949,29800.6 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.212146,29770.2 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.141663,29755 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.1946,480936 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.300548,29796.8 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.267365,29796.8 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.232684,29796.8 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,27.2799,307255 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.267851,29736 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.631439,33649.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.142719,29732.2 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.136005,29728.4 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.606287,29728.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.21748,34946.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.195122,29728.4 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.82772,116495 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_268.txt deleted file mode 100644 index 15ea4be9c331557db770acf1fa4e5439f4e7357c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,157.979,1.44264e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.415761,26271.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.347633,26290 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.75702,58178 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.7396,34349.2 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,78.2725,786440 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.187634,26517 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.305835,27039.2 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.148415,26541 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,126.218,1.25053e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.342577,26794.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.371102,27384.4 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.206737,26820.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,52.577,492786 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.325796,26844 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.01519,29879.6 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,63.0262,627772 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.293855,27044.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.00186,30287.6 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.197906,27037.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.168773,27041.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,143.678,1.45529e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.331372,27506 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.450589,28211 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.298071,27475.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,129.288,1.30893e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.376555,27609.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.472227,28387.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.309636,27590.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,53.0654,524344 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.33491,27610 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,0.95881,31636.8 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.17667,27633 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.187128,27640.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,144.088,1.49667e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.33226,28039 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.45448,28916.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.294891,28058 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,129.467,1.33103e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.37537,28122.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.465347,29030 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.309656,28107.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.2242,538073 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.334244,28115 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.963821,32736.6 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.165689,28107.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.170674,28111.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.4395,871762 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.273049,28294.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.255794,28282.8 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.191263,28290.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,76.4929,809843 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.329592,28416 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.31068,28416 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.285259,28400.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,32.3583,345417 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.28775,28397 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.53585,30361.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,53.1277,572101 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.289964,28649.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.631568,30663.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.153215,28642 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.171558,28657.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,74.9948,806547 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.293426,28860 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.29427,28875.2 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.275314,28863.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,77.3328,831845 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.326328,28966.8 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.293848,28925 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.281982,28898.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,32.3874,350317 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.381092,28887 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.544599,31049.6 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.126668,28898.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.152953,28898.4 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,74.9127,814790 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.297016,29062.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.301873,29073.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.274002,29058.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,77.4105,841822 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.336043,29130.6 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.304018,29138.2 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.2865,29138.2 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,32.126,350003 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.290866,29123 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.543569,31384.8 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.125439,29138.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.152377,29138.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,74.9284,821387 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.291358,29309.8 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.294385,29309.6 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.273348,29302.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,77.8189,853164 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.339505,29381.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.299902,29355.2 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.281707,29343.8 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,32.2492,354893 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.29162,29286.8 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.542928,31606.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.123826,29283 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.158681,29260.2 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,40.9547,462549 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.227443,29351.8 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.155289,29336.6 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.138879,29317.4 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,53.0785,586709 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.314917,29378.4 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.297381,29348 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.256945,29351.8 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,23.0526,264413 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.230917,29302.4 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.486327,31652.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,52.7449,591079 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.303096,29469.6 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.630525,31850.4 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.212543,29458.2 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.16711,29465.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,45.0617,503990 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.268997,29557.6 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.258495,29538.4 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.161042,29523 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.5478,595695 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.313111,29526.6 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.296901,29515.2 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.261228,29500 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,22.9725,266458 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.222629,29465.2 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.476951,31895.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.115852,29469.2 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.136691,29469.2 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,45.0946,504612 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.263403,29626.4 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.238034,29611.2 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.15543,29596 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,53.411,596209 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.309496,29609.6 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.295479,29563.4 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.260568,29563.4 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,23.2305,268111 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.223615,29513 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.480977,31969.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.108256,29497.6 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.135097,29497.6 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,44.8984,502389 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.270699,28171.6 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.233798,28156.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.160274,28140.8 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,53.5999,591389 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.31779,29643 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.298859,29635.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.258545,29600.8 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,22.9657,267075 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.222539,29539.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.493456,32057.4 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.130853,29543.6 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.146994,29555 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,45.0524,506095 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.264236,29685.2 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.237542,29677.6 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.16341,29670 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.3686,594822 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.322207,29663 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.298399,29659 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.267973,29643.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,23.1358,267504 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.223647,29590.2 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.445841,32146.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.119321,29582.4 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.153158,29582.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,45.066,506608 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.268908,29750.8 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.238743,29735.6 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.15765,29705 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,53.3066,595357 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.30606,29724.4 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.268152,29709.2 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.261214,29697.8 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,23.0216,268090 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.216491,29667.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.446301,32219.2 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.110777,29644.2 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.134303,29644.2 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.0053,301122 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.241759,29728.2 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.199448,29728.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.136262,29678.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,42.8956,480949 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.295755,29728.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.27715,29728.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.213292,29712.8 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,27.1434,302019 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.260773,29674.8 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.645756,33527.8 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,41.1084,452568 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.288068,29812 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.663823,33680.2 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.185196,29793 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.143673,29793 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,36.7699,418828 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.271647,29865.2 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.213202,29865.2 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.143001,29834.8 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.2289,486662 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.297765,29884.2 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.263953,29884.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.221432,29880.4 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,27.1151,303465 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.266584,29823.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.620457,32402.2 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.134022,29808.2 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.140991,29823.4 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,37.1188,421913 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.273137,29884.2 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.223877,29876.6 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.14247,29846.2 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,42.8424,479392 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.293854,29884.2 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.26901,29872.8 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.230361,29857.6 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,27.109,305105 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.268044,29808.2 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.624092,32406 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.136325,29793 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.13687,29777.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.615158,29793 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.33968,36316 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.192172,29796.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.92475,116746 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_269.txt deleted file mode 100644 index ffe9c7f43dbf4070056d9c9b3dc7defa01c4ba82..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,160.729,1.45536e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.403914,26163 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.293496,26109.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,5.9709,67251.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,2.73814,34096.6 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,77.2551,773915 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.196837,26441 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.301368,26962.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.146015,26452.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,127.234,1.24772e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.342405,26639.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.387269,27225.6 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.212562,26662 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,52.6512,509299 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.323845,26704 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,0.994126,29766.2 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,63.0586,629511 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.30188,26892.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,0.971502,30135.4 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.180805,26900.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.163724,26900.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,143.476,1.44485e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.327626,27376.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.446218,28089.4 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.290584,27365.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,129.567,1.3028e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.378763,27505.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.472797,28294.4 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.293528,27501.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,53.5011,525808 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.335556,27487.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.00013,31518.4 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.165355,27507 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.162123,27507 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,144.947,1.49617e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.339415,27893.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.450275,28770.6 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.282021,27885.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,129.562,1.32464e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.371107,27965.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.471083,28881.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.31509,27977.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,53.1016,536493 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.337483,27939.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,0.979847,32568.4 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.157055,27946.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.159999,27950.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,82.9835,867403 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.275825,28141.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.315225,28095.6 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.182643,28103.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,76.7925,812172 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.34746,28306 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.3102,28283.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.276101,28290.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,32.5864,342571 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.292772,28244.8 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.505623,30228.4 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,53.0444,569632 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.290751,28496 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.608208,30517.8 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.155372,28488.2 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.164038,28488.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,74.8307,802514 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.28636,28749 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.297414,28741.4 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.268517,28725.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,77.597,837031 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.33971,28795.2 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.293253,28783.8 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.281912,28791.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,32.5679,350264 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.29166,28776 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.55274,30923.2 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.134687,28772 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.151436,28787.2 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,74.9734,811515 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.292447,29001.2 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.294846,28955.6 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.271256,28967 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,77.8673,843698 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.333534,29062.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.304983,29050.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.277649,29020.4 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,32.5221,352657 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.29802,28997.6 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.553066,31240.4 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.125944,28978.6 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.146981,28963.4 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,75.1733,818688 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.293906,29172.6 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.287685,29176.4 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.272254,29180.2 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,78.116,848689 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.335249,29207 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.309176,29214.6 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.281451,29214.6 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,32.5588,355049 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.296836,29142.4 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.54504,31477.2 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.129221,29157.6 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.161413,29161.6 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,41.3082,464069 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.24133,29237.8 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.157612,29237.8 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.134022,29207.2 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,53.4512,586710 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.31349,29207.4 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.28963,29207.4 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.252632,29211.2 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,23.1345,263623 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.230143,29203.6 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.488023,31546.4 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,52.8845,588491 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.28999,29291 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.624598,31683.2 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.207493,29291 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.172324,29260.6 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,44.9031,498448 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.265106,29477.4 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.26782,29466 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.166316,29458.4 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,53.4766,590058 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.303569,29477 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.296952,29477 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.261432,29480.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,23.1321,265588 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.226117,29416 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.479498,31849.6 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.115353,29400.8 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.13456,29385.6 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,44.9036,501895 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.264075,29522.8 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.261374,29526.6 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.154642,29526.6 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,53.4845,597344 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.311979,29502 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.291371,29506 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.261817,29502 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,23.1998,267699 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.228498,29466.8 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.483664,31946.4 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.106975,29466.8 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.13262,29466.8 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,44.8166,502763 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.257995,29532.2 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.238661,29544.2 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.157516,29532.2 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,53.5401,592689 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.307743,29539.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.292395,29516.2 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.256261,29508.4 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,23.04,266388 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.226175,29481.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.481668,31995.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.110341,29470.4 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.135212,29474.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,45.0617,501693 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.275665,29643 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.292344,29643 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.157817,29593.2 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,53.538,594523 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.307729,29586 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.291947,29570.6 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.25909,29543.6 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,23.189,266549 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.226994,29501.6 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.489975,32046.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.117747,29505.4 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.147903,29505.4 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,45.0959,503532 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.268753,29662.8 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.238936,29662.8 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.159443,29628.2 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,53.9496,597935 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.317989,29624.8 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.270002,29620.8 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.258123,29559.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,23.5419,269971 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.225202,29509.6 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.434609,32080.8 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.106355,29513.4 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.134822,29521 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,27.0386,305748 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.238398,29597.4 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.17998,29578.2 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.133651,29547.4 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,42.9802,477477 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.295012,29605.2 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.268101,29605.2 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.223218,29563 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,27.4824,306594 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.277599,29528.6 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.631325,33431 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,41.2329,465223 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.329508,29655.6 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.656246,33565.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.16467,29655.6 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.149323,29655.6 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,37.076,418406 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.26597,29732 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.224844,29716.8 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.139506,29716.8 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,43.43,486118 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.295089,29747.4 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.279307,29751.2 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.23589,29735.8 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,27.2939,306835 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.270392,29705.4 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.636041,33607.8 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.139308,29701.6 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.139647,29701.6 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,36.897,417413 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.276343,29747.4 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.213509,29732.2 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.138284,29717 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,43.1599,481978 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.291519,29755 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.276235,29762.6 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.222891,29755 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,27.6381,310049 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.274859,29709.4 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.630793,33611.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.136069,29686.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.13566,29682.8 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.623741,29682.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.25289,36186.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.135909,29682.8 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,9.72458,114871 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp32_perf_fp32_120.txt deleted file mode 100644 index 8d9d8fee56451e2961b8d7b53aed70aef75d0acb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/resnet50_imagenet/resnet50_imagenet_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,687 +0,0 @@ -Conv1,352.727,2.71583e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.49045,21979.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.298591,21994.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Pool1,14.1205,117030 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -BatchNorm1,3.20576,31212 -BatchNorm1_f2h,0,0 -BatchNorm1_h2f,0,0 -Conv2,96.997,770268 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.399084,22871.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -BatchNorm2,0.640881,23419.8 -BatchNorm2_f2h,0,0 -BatchNorm2_h2f,0,0 -Relu2,0.363673,22882.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Conv3,269.812,2.26471e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.433215,23528.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -BatchNorm3,0.618777,24230 -BatchNorm3_f2h,0,0 -BatchNorm3_h2f,0,0 -Relu3,0.424281,23547.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,144.543,1.18734e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.403065,24240 -Add4_f2h,0,0 -Add4_h2f,0,0 -BatchNorm4,1.53223,30464 -BatchNorm4_f2h,0,0 -BatchNorm4_h2f,0,0 -Conv5,166.193,1.48125e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.356293,23675.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -BatchNorm5,1.51066,30515 -BatchNorm5_f2h,0,0 -BatchNorm5_h2f,0,0 -Add6,0.26439,23664.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu4,0.309632,23672.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Conv6,217.966,2.03416e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add7,0.406316,25935.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -BatchNorm6,0.614763,26893.2 -BatchNorm6_f2h,0,0 -BatchNorm6_h2f,0,0 -Relu5,0.349894,25912.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv7,268.095,2.47292e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add8,0.43847,26289.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -BatchNorm7,0.62083,27255.2 -BatchNorm7_f2h,0,0 -BatchNorm7_h2f,0,0 -Relu6,0.353343,26301.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv8,173.448,1.58116e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add9,0.390066,26442.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -BatchNorm8,1.54885,34110.6 -BatchNorm8_f2h,0,0 -BatchNorm8_h2f,0,0 -Add10,0.280256,26442.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu7,0.320128,26442.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Conv9,219.91,2.1566e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add11,0.410745,27254 -Add11_f2h,0,0 -Add11_h2f,0,0 -BatchNorm9,0.608754,29295.2 -BatchNorm9_f2h,0,0 -BatchNorm9_h2f,0,0 -Relu8,0.34229,27250 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv10,405.412,3.80103e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add12,0.423705,25999.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -BatchNorm10,0.627186,27026.2 -BatchNorm10_f2h,0,0 -BatchNorm10_h2f,0,0 -Relu9,0.341772,25999.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv11,274.518,2.45326e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add13,0.354445,25078.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -BatchNorm11,1.53738,33075 -BatchNorm11_f2h,0,0 -BatchNorm11_h2f,0,0 -Add14,0.283603,23765.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu10,0.315775,23769.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Conv12,128.551,1.20479e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add15,0.345157,25545.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -BatchNorm12,0.428863,25545.2 -BatchNorm12_f2h,0,0 -BatchNorm12_h2f,0,0 -Relu11,0.290988,25544.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv13,157.593,1.47273e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add16,0.404562,25638.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -BatchNorm13,0.441138,25627.2 -BatchNorm13_f2h,0,0 -BatchNorm13_h2f,0,0 -Relu12,0.307039,25631.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv14,147.479,1.33218e+06 -Conv14_f2h,0,0 -Conv14_h2f,0,0 -Add17,0.321491,25208.2 -Add17_f2h,0,0 -Add17_h2f,0,0 -BatchNorm14,0.897682,29482.6 -BatchNorm14_f2h,0,0 -BatchNorm14_h2f,0,0 -Conv15,189.674,1.72808e+06 -Conv15_f2h,0,0 -Conv15_h2f,0,0 -Add18,0.35511,25306.6 -Add18_f2h,0,0 -Add18_h2f,0,0 -BatchNorm15,0.923762,29649 -BatchNorm15_f2h,0,0 -BatchNorm15_h2f,0,0 -Add19,0.288403,25333.6 -Add19_f2h,0,0 -Add19_h2f,0,0 -Relu13,0.314989,25326 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Conv16,151.332,1.38495e+06 -Conv16_f2h,0,0 -Conv16_h2f,0,0 -Add20,0.364755,25440 -Add20_f2h,0,0 -Add20_h2f,0,0 -BatchNorm16,0.45182,25413.6 -BatchNorm16_f2h,0,0 -BatchNorm16_h2f,0,0 -Relu14,0.30583,25436.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Conv17,242.979,2.15965e+06 -Conv17_f2h,0,0 -Conv17_h2f,0,0 -Add21,0.408454,24674.6 -Add21_f2h,0,0 -Add21_h2f,0,0 -BatchNorm17,0.454418,24693.6 -BatchNorm17_f2h,0,0 -BatchNorm17_h2f,0,0 -Relu15,0.323397,24697.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Conv18,135.456,1.18545e+06 -Conv18_f2h,0,0 -Conv18_h2f,0,0 -Add22,0.325971,24537.4 -Add22_f2h,0,0 -Add22_h2f,0,0 -BatchNorm18,0.928587,28905.8 -BatchNorm18_f2h,0,0 -BatchNorm18_h2f,0,0 -Add23,0.266848,24537.4 -Add23_f2h,0,0 -Add23_h2f,0,0 -Relu16,0.221516,24552.6 -Relu16_f2h,0,0 -Relu16_h2f,0,0 -Conv19,109.495,1.00602e+06 -Conv19_f2h,0,0 -Conv19_h2f,0,0 -Add24,0.352371,25164.8 -Add24_f2h,0,0 -Add24_h2f,0,0 -BatchNorm19,0.430892,25138.2 -BatchNorm19_f2h,0,0 -BatchNorm19_h2f,0,0 -Relu17,0.309568,25153.4 -Relu17_f2h,0,0 -Relu17_h2f,0,0 -Conv20,240.505,2.12346e+06 -Conv20_f2h,0,0 -Conv20_h2f,0,0 -Add25,0.405664,24427.2 -Add25_f2h,0,0 -Add25_h2f,0,0 -BatchNorm20,0.451731,24434.8 -BatchNorm20_f2h,0,0 -BatchNorm20_h2f,0,0 -Relu18,0.318406,24442 -Relu18_f2h,0,0 -Relu18_h2f,0,0 -Conv21,134.238,1.15705e+06 -Conv21_f2h,0,0 -Conv21_h2f,0,0 -Add26,0.328722,24259 -Add26_f2h,0,0 -Add26_h2f,0,0 -BatchNorm21,0.910079,28655 -BatchNorm21_f2h,0,0 -BatchNorm21_h2f,0,0 -Add27,0.244902,24259.2 -Add27_f2h,0,0 -Add27_h2f,0,0 -Relu19,0.208806,24278.2 -Relu19_f2h,0,0 -Relu19_h2f,0,0 -Conv22,109.363,995075 -Conv22_f2h,0,0 -Conv22_h2f,0,0 -Add28,0.359077,24915.2 -Add28_f2h,0,0 -Add28_h2f,0,0 -BatchNorm22,0.445318,24922.8 -BatchNorm22_f2h,0,0 -BatchNorm22_h2f,0,0 -Relu20,0.313215,24930.4 -Relu20_f2h,0,0 -Relu20_h2f,0,0 -Conv23,238.499,2.08729e+06 -Conv23_f2h,0,0 -Conv23_h2f,0,0 -Add29,0.415065,24247.8 -Add29_f2h,0,0 -Add29_h2f,0,0 -BatchNorm23,0.465695,24263.4 -BatchNorm23_f2h,0,0 -BatchNorm23_h2f,0,0 -Relu21,0.322854,24275.2 -Relu21_f2h,0,0 -Relu21_h2f,0,0 -Conv24,131.667,1.13298e+06 -Conv24_f2h,0,0 -Conv24_h2f,0,0 -Add30,0.325343,24183.2 -Add30_f2h,0,0 -Add30_h2f,0,0 -BatchNorm24,0.904709,28612.4 -BatchNorm24_f2h,0,0 -BatchNorm24_h2f,0,0 -Add31,0.222137,24210.4 -Add31_f2h,0,0 -Add31_h2f,0,0 -Relu22,0.210669,24226.4 -Relu22_f2h,0,0 -Relu22_h2f,0,0 -Conv25,63.9229,584253 -Conv25_f2h,0,0 -Conv25_h2f,0,0 -Add32,0.287955,24537 -Add32_f2h,0,0 -Add32_h2f,0,0 -BatchNorm25,0.339839,24537 -BatchNorm25_f2h,0,0 -BatchNorm25_h2f,0,0 -Relu23,0.22014,24498.8 -Relu23_f2h,0,0 -Relu23_h2f,0,0 -Conv26,100.177,906648 -Conv26_f2h,0,0 -Conv26_h2f,0,0 -Add33,0.372223,24919.4 -Add33_f2h,0,0 -Add33_h2f,0,0 -BatchNorm26,0.365586,24907.8 -BatchNorm26_f2h,0,0 -BatchNorm26_h2f,0,0 -Relu24,0.279231,24911.6 -Relu24_f2h,0,0 -Relu24_h2f,0,0 -Conv27,76.8815,689259 -Conv27_f2h,0,0 -Conv27_h2f,0,0 -Add34,0.305549,24946.2 -Add34_f2h,0,0 -Add34_h2f,0,0 -BatchNorm27,0.564005,27250.8 -BatchNorm27_f2h,0,0 -BatchNorm27_h2f,0,0 -Conv28,116.268,1.06217e+06 -Conv28_f2h,0,0 -Conv28_h2f,0,0 -Add35,0.325055,25244.2 -Add35_f2h,0,0 -Add35_h2f,0,0 -BatchNorm28,0.683973,27606 -BatchNorm28_f2h,0,0 -BatchNorm28_h2f,0,0 -Add36,0.209746,25267 -Add36_f2h,0,0 -Add36_h2f,0,0 -Relu25,0.177894,25289.8 -Relu25_f2h,0,0 -Relu25_h2f,0,0 -Conv29,82.3367,767483 -Conv29_f2h,0,0 -Conv29_h2f,0,0 -Add37,0.316652,25480 -Add37_f2h,0,0 -Add37_h2f,0,0 -BatchNorm29,0.346803,25483.8 -BatchNorm29_f2h,0,0 -BatchNorm29_h2f,0,0 -Relu26,0.263852,25491.4 -Relu26_f2h,0,0 -Relu26_h2f,0,0 -Conv30,129.534,1.19781e+06 -Conv30_f2h,0,0 -Conv30_h2f,0,0 -Add38,0.371858,25514.4 -Add38_f2h,0,0 -Add38_h2f,0,0 -BatchNorm30,0.372434,25506.8 -BatchNorm30_f2h,0,0 -BatchNorm30_h2f,0,0 -Relu27,0.265414,25525.8 -Relu27_f2h,0,0 -Relu27_h2f,0,0 -Conv31,76.4658,705852 -Conv31_f2h,0,0 -Conv31_h2f,0,0 -Add39,0.30581,25430.6 -Add39_f2h,0,0 -Add39_h2f,0,0 -BatchNorm31,0.585222,27823.2 -BatchNorm31_f2h,0,0 -BatchNorm31_h2f,0,0 -Add40,0.127334,25442 -Add40_f2h,0,0 -Add40_h2f,0,0 -Relu28,0.160691,25445.8 -Relu28_f2h,0,0 -Relu28_h2f,0,0 -Conv32,67.8801,645673 -Conv32_f2h,0,0 -Conv32_h2f,0,0 -Add41,0.317267,25816 -Add41_f2h,0,0 -Add41_h2f,0,0 -BatchNorm32,0.341324,25823.6 -BatchNorm32_f2h,0,0 -BatchNorm32_h2f,0,0 -Relu29,0.273017,25819.6 -Relu29_f2h,0,0 -Relu29_h2f,0,0 -Conv33,128.942,1.20726e+06 -Conv33_f2h,0,0 -Conv33_h2f,0,0 -Add42,0.375462,25739.4 -Add42_f2h,0,0 -Add42_h2f,0,0 -BatchNorm33,0.365759,25762.2 -BatchNorm33_f2h,0,0 -BatchNorm33_h2f,0,0 -Relu30,0.268441,25766 -Relu30_f2h,0,0 -Relu30_h2f,0,0 -Conv34,75.9416,688671 -Conv34_f2h,0,0 -Conv34_h2f,0,0 -Add43,0.312371,25720.4 -Add43_f2h,0,0 -Add43_h2f,0,0 -BatchNorm34,0.55925,28139.6 -BatchNorm34_f2h,0,0 -BatchNorm34_h2f,0,0 -Add44,0.135948,25724.2 -Add44_f2h,0,0 -Add44_h2f,0,0 -Relu31,0.157631,25731.8 -Relu31_f2h,0,0 -Relu31_h2f,0,0 -Conv35,68.6611,660372 -Conv35_f2h,0,0 -Conv35_h2f,0,0 -Add45,0.313395,26095.8 -Add45_f2h,0,0 -Add45_h2f,0,0 -BatchNorm35,0.363064,26103.4 -BatchNorm35_f2h,0,0 -BatchNorm35_h2f,0,0 -Relu32,0.273715,26091.8 -Relu32_f2h,0,0 -Relu32_h2f,0,0 -Conv36,128.796,1.21921e+06 -Conv36_f2h,0,0 -Conv36_h2f,0,0 -Add46,0.375922,26038.2 -Add46_f2h,0,0 -Add46_h2f,0,0 -BatchNorm36,0.369222,26045.8 -BatchNorm36_f2h,0,0 -BatchNorm36_h2f,0,0 -Relu33,0.278444,26045.8 -Relu33_f2h,0,0 -Relu33_h2f,0,0 -Conv37,75.7666,717855 -Conv37_f2h,0,0 -Conv37_h2f,0,0 -Add47,0.297465,25915.8 -Add47_f2h,0,0 -Add47_h2f,0,0 -BatchNorm37,0.538463,28353.6 -BatchNorm37_f2h,0,0 -BatchNorm37_h2f,0,0 -Add48,0.13081,25927.2 -Add48_f2h,0,0 -Add48_h2f,0,0 -Relu34,0.159641,25927.2 -Relu34_f2h,0,0 -Relu34_h2f,0,0 -Conv38,68.59,660503 -Conv38_f2h,0,0 -Conv38_h2f,0,0 -Add49,0.315603,26301.2 -Add49_f2h,0,0 -Add49_h2f,0,0 -BatchNorm38,0.3467,26278 -BatchNorm38_f2h,0,0 -BatchNorm38_h2f,0,0 -Relu35,0.27127,26289.4 -Relu35_f2h,0,0 -Relu35_h2f,0,0 -Conv39,129.905,1.23748e+06 -Conv39_f2h,0,0 -Conv39_h2f,0,0 -Add50,0.376045,26129.4 -Add50_f2h,0,0 -Add50_h2f,0,0 -BatchNorm39,0.373753,26133.2 -BatchNorm39_f2h,0,0 -BatchNorm39_h2f,0,0 -Relu36,0.284358,26140.8 -Relu36_f2h,0,0 -Relu36_h2f,0,0 -Conv40,75.3331,714353 -Conv40_f2h,0,0 -Conv40_h2f,0,0 -Add51,0.312057,26099 -Add51_f2h,0,0 -Add51_h2f,0,0 -BatchNorm40,0.612383,28529.2 -BatchNorm40_f2h,0,0 -BatchNorm40_h2f,0,0 -Add52,0.130508,26083.8 -Add52_f2h,0,0 -Add52_h2f,0,0 -Relu37,0.166067,26099 -Relu37_f2h,0,0 -Relu37_h2f,0,0 -Conv41,69.9504,680018 -Conv41_f2h,0,0 -Conv41_h2f,0,0 -Add53,0.322118,26458.6 -Add53_f2h,0,0 -Add53_h2f,0,0 -BatchNorm41,0.357381,26462.4 -BatchNorm41_f2h,0,0 -BatchNorm41_h2f,0,0 -Relu38,0.276204,26470 -Relu38_f2h,0,0 -Relu38_h2f,0,0 -Conv42,129.6,1.23621e+06 -Conv42_f2h,0,0 -Conv42_h2f,0,0 -Add54,0.378489,26309 -Add54_f2h,0,0 -Add54_h2f,0,0 -BatchNorm42,0.366976,26312.8 -BatchNorm42_f2h,0,0 -BatchNorm42_h2f,0,0 -Relu39,0.274348,26328.2 -Relu39_f2h,0,0 -Relu39_h2f,0,0 -Conv43,75.2967,719905 -Conv43_f2h,0,0 -Conv43_h2f,0,0 -Add55,0.310246,26263.4 -Add55_f2h,0,0 -Add55_h2f,0,0 -BatchNorm43,0.589254,28762.6 -BatchNorm43_f2h,0,0 -BatchNorm43_h2f,0,0 -Add56,0.129183,26278.8 -Add56_f2h,0,0 -Add56_h2f,0,0 -Relu40,0.157555,26286.4 -Relu40_f2h,0,0 -Relu40_h2f,0,0 -Conv44,36.9811,371583 -Conv44_f2h,0,0 -Conv44_h2f,0,0 -Add57,0.281792,26466 -Add57_f2h,0,0 -Add57_h2f,0,0 -BatchNorm44,0.229702,26450.6 -BatchNorm44_f2h,0,0 -BatchNorm44_h2f,0,0 -Relu41,0.154208,26439 -Relu41_f2h,0,0 -Relu41_h2f,0,0 -Conv45,86.571,848544 -Conv45_f2h,0,0 -Conv45_h2f,0,0 -Add58,0.324044,26657.4 -Add58_f2h,0,0 -Add58_h2f,0,0 -BatchNorm45,0.326054,26657.4 -BatchNorm45_f2h,0,0 -BatchNorm45_h2f,0,0 -Relu42,0.263474,26660.8 -Relu42_f2h,0,0 -Relu42_h2f,0,0 -Conv46,42.9393,428148 -Conv46_f2h,0,0 -Conv46_h2f,0,0 -Add59,0.265369,26725.2 -Add59_f2h,0,0 -Add59_h2f,0,0 -BatchNorm46,0.575724,30517.4 -BatchNorm46_f2h,0,0 -BatchNorm46_h2f,0,0 -Conv47,87.2954,864913 -Conv47_f2h,0,0 -Conv47_h2f,0,0 -Add60,0.29422,27020 -Add60_f2h,0,0 -Add60_h2f,0,0 -BatchNorm47,0.596255,29568.6 -BatchNorm47_f2h,0,0 -BatchNorm47_h2f,0,0 -Add61,0.122092,27020.2 -Add61_f2h,0,0 -Add61_h2f,0,0 -Relu43,0.160172,27020.2 -Relu43_f2h,0,0 -Relu43_h2f,0,0 -Conv48,52.3917,528531 -Conv48_f2h,0,0 -Conv48_h2f,0,0 -Add62,0.281618,27346.6 -Add62_f2h,0,0 -Add62_h2f,0,0 -BatchNorm48,0.32663,27331.4 -BatchNorm48_f2h,0,0 -BatchNorm48_h2f,0,0 -Relu44,0.224127,27331.4 -Relu44_f2h,0,0 -Relu44_h2f,0,0 -Conv49,91.6831,928447 -Conv49_f2h,0,0 -Conv49_h2f,0,0 -Add63,0.330559,27327.4 -Add63_f2h,0,0 -Add63_h2f,0,0 -BatchNorm49,0.338815,27327.4 -BatchNorm49_f2h,0,0 -BatchNorm49_h2f,0,0 -Relu45,0.259699,27327.4 -Relu45_f2h,0,0 -Relu45_h2f,0,0 -Conv50,42.0593,428805 -Conv50_f2h,0,0 -Conv50_h2f,0,0 -Add64,0.264121,27467.8 -Add64_f2h,0,0 -Add64_h2f,0,0 -BatchNorm50,0.521874,30047 -BatchNorm50_f2h,0,0 -BatchNorm50_h2f,0,0 -Add65,0.111501,27467.8 -Add65_f2h,0,0 -Add65_h2f,0,0 -Relu46,0.144992,27467.8 -Relu46_f2h,0,0 -Relu46_h2f,0,0 -Conv51,55.3189,566082 -Conv51_f2h,0,0 -Conv51_h2f,0,0 -Add66,0.286432,27642.8 -Add66_f2h,0,0 -Add66_h2f,0,0 -BatchNorm51,0.326393,27642.8 -BatchNorm51_f2h,0,0 -BatchNorm51_h2f,0,0 -Relu47,0.215097,27627.6 -Relu47_f2h,0,0 -Relu47_h2f,0,0 -Conv52,91.6897,930911 -Conv52_f2h,0,0 -Conv52_h2f,0,0 -Add67,0.331532,27715.4 -Add67_f2h,0,0 -Add67_h2f,0,0 -BatchNorm52,0.346547,27669.6 -BatchNorm52_f2h,0,0 -BatchNorm52_h2f,0,0 -Relu48,0.262815,27677.2 -Relu48_f2h,0,0 -Relu48_h2f,0,0 -Conv53,42.7118,442905 -Conv53_f2h,0,0 -Conv53_h2f,0,0 -Add68,0.264089,27745.6 -Add68_f2h,0,0 -Add68_h2f,0,0 -BatchNorm53,0.51813,30339.8 -BatchNorm53_f2h,0,0 -BatchNorm53_h2f,0,0 -Add69,0.109933,27745.6 -Add69_f2h,0,0 -Add69_h2f,0,0 -Relu49,0.148806,27745.6 -Relu49_f2h,0,0 -Relu49_h2f,0,0 -Pool2,0.74028,27745.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Mul1,1.56576,38195.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add70,0.138682,27757 -Add70_f2h,0,0 -Add70_h2f,0,0 -Softmax1,11.7675,135069 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_120.txt deleted file mode 100644 index 35b105d97a68bcbfbff43da403a925200b5f5382..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,111.207,1.12166e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212219,14227.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.174597,14218.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,576.179,5.78929e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.227266,14260.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.175426,14254.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.16613,56288.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,192.271,1.94297e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208485,14757.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.125772,14751.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,316.57,3.39462e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.216479,15116.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.172252,15107.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.36676,43823.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,109.625,1.16678e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.212504,15340.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.15622,15340.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,190.588,2.11006e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206818,15480.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.157071,15465.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,190.242,2.12378e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.215202,15600.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.159916,15602.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.16016,31188.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,72.6437,816386 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.202409,15848.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.147737,15841.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,132.01,1.54934e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.206082,16210.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.147573,16212.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,132.995,1.58373e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.207378,16535.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149116,16527.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.33644,18180.1 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.9618,814281 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.171717,16353.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.139711,16343.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.7829,804755 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.159362,16150.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133334,16143.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.6739,792408 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.161289,15975.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.143932,15960.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.352235,15960.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.433457,17406.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0949944,15952.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0750587,15952.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.207237,15889.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0552669,15878.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.30158,18260 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_151.txt deleted file mode 100644 index 8280ad73a88b4ca9a6b56c4118210a280283cd5e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,109.883,1.04254e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.215451,13361.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165695,13375.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,285.834,2.77915e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.222789,13616.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.172303,13622.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.30943,55247.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,110.169,1.06864e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210159,14139.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.168447,14147.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,163.861,1.67941e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217589,14343.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.170895,14335.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.42049,42315.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,64.8149,666999 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.203035,14561.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152188,14567.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,98.8989,1.05592e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208495,14710.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15037,14703.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,99.2197,1.06738e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.208639,14860.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.147836,14862.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.03008,29737.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.8777,528973 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189513,14951 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.146172,14951 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,109.707,1.19456e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.18853,14881.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.151094,14866.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,110.228,1.19521e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.184844,14846.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.143065,14838.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29591,14838.5 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.6806,488767 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.160754,14734.7 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.131202,14734.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.6508,457221 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.143548,14620.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.130312,14620.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.437,452738 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.143321,14506.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.125548,14506.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.260923,14506.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.344148,15706.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0640094,14502.5 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0617083,14502.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.16853,14494.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0530205,14483.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.21648,15206.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_152.txt deleted file mode 100644 index 2bb5c473d4584e3193f3b3390053700b27506565..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,108.529,1.02679e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.209208,13383.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167855,13359.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,282.701,2.74934e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221096,13657.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.172502,13660.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.22041,55982 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,109.437,1.06802e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.215816,14157.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.172437,14164.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,162.933,1.66082e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21342,14348.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.173186,14356.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.29587,43105.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,64.4613,665724 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.204095,14601.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.156335,14593.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,98.4478,1.05503e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.204312,14733.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.154844,14728.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,99.1785,1.06485e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.206322,14865.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.149935,14849.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.0148,29706.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.9101,529182 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.1767,14974 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14277,14974 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,109.931,1.20504e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.184658,14880.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.14318,14865 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,110.398,1.18763e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.186411,14836.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.151529,14821.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.28786,14821.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.2478,471643 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.159807,14755.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.135913,14736.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.6118,444300 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.148268,14631.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.129462,14628.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.4124,440481 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.138815,14516 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.126019,14514.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.25027,14514.1 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.343537,15719.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0633243,14510.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0601566,14508.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.169481,14500.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0528224,14491.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.28432,18006.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_153.txt deleted file mode 100644 index 2fc0ebfbe5c2e0fe96637b22efcecbcc79d1ac13..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,98.1298,916184 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.214213,13206.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.169397,13212.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,274.817,2.6343e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.233183,13445.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.175609,13449.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.15712,54522.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,108.185,1.02872e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.224367,13944.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.172946,13939 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,162.697,1.63629e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21757,14147.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.169247,14139.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.32408,42447.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,64.3237,648420 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.206869,14355.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152015,14351.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,98.5349,1.03792e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20861,14496.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15237,14491 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,99.0745,1.04968e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.21197,14616.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154018,14616.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.93814,29234.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,49.0738,521483 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.184223,14739.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.150082,14739.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,110.77,1.19663e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.190623,14672 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.145014,14666.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,111.379,1.19415e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.19268,14609.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.144002,14609.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2826,14609.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.1364,466132 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.167301,14554.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.129804,14546.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.7482,440811 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.147442,14454.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.135721,14450.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.3888,436274 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.145516,14360.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.12935,14360.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.260203,14360.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.330046,15549.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0703355,14356.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0632124,14356.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.179161,14347.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0531199,14341.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.23427,18637 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_154.txt deleted file mode 100644 index d1e7a3a32ff94dcbad5f9812f10e034afca71201..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,98.9913,918542 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.218239,13141.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165186,13143.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,275.986,2.62748e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.2159,13389.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.174021,13383.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.20344,54275.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,107.914,1.03712e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213314,13875.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.167669,13881.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,162.166,1.62723e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212911,14090 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.170021,14074.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.38287,40823.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,64.6072,652231 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205071,14320.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.150616,14320.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,98.8683,1.04122e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.202149,14464.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15302,14462.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,99.1482,1.0451e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.205432,14599.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.151234,14580.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.09322,29160.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,49.348,523166 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.181372,14688.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.142569,14681.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,110.688,1.18679e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.179871,14635.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.143346,14628.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,111.263,1.18976e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.190357,14592.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.150431,14592.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.26548,14594.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.5816,477628 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.154038,14527.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.134607,14502.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,42.0021,452542 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.143509,14418.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.128159,14402.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.8429,448163 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.143912,14330.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.12701,14328.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.264712,14328.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.340673,15511.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0637851,14320.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0602874,14320.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.169948,14301.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0527903,14290.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.25248,16424.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_155.txt deleted file mode 100644 index 6e417397af8fb9593a07eca1c6bb12065668dc51..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,119.079,1.13372e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.210005,13453.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.188552,13430.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,355.973,3.45901e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220786,13700.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176869,13700.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.35758,54877.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,139.06,1.36761e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21308,14299.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176134,14282.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,218.057,2.25294e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214319,14592.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.171458,14604.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.30404,43091.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,77.7244,808220 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.206946,14810.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.156415,14810.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,127.822,1.38616e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.207512,14921.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.156837,14923.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,129.446,1.38598e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.20501,14961.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.157324,14967.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.08451,29946.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.0753,522084 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.187858,15131.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.145384,15124.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,107.055,1.18645e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.181317,15033.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.145423,14263.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,107.51,1.17693e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.185138,14993 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.147769,14985.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.30975,14985.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.3652,492728 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.150988,14892.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.132361,14880.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.8004,465073 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.138956,14770.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.131059,14763.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.7483,459855 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.147753,14649.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.130773,14647.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.263538,14645.5 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.329131,15904.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0637437,14643.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0625727,14643.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.1695,14649.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0540413,14639.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.22707,16820 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_156.txt deleted file mode 100644 index a7a3c079d38e5f5f638c5c65b5f92b01dce1533d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,117.965,1.12843e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212223,13545.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.177131,13547 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,352.332,3.44013e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.222315,13788.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.178828,13792.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.33666,55900.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,144.329,1.42745e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.217807,14370.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.167439,14361.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,227.662,2.37447e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214821,14753.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.17828,14743.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.35686,44256.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,76.6474,799817 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205151,14973.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.154582,14958.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,125.967,1.37685e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208716,15044.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.154658,15046.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,127.389,1.3932e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.208715,15119.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.156092,15113.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.20968,30228.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.34,738285 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.188687,15261.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.149209,15254 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,151.955,1.6812e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.198655,15126.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.148448,15126.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,152.453,1.67297e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198367,15084.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.151446,15084.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.3114,15835.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.612,480765 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.162652,14993 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.135993,14977.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.4848,451786 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.140524,14873.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.130396,14864.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.5455,449157 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.145813,14771.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.13126,14771.5 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.262312,14767.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.317774,16064.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0628124,14767.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0632797,14767.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.169733,14763.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0537853,14727.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.25415,15435.2 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_157.txt deleted file mode 100644 index df8e948520da6185160850f198075fed6ed2d31d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,118.591,1.14231e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.211477,13518.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.175544,13526 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,365.717,3.56044e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.223941,13767.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.172991,13771.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.27455,55127.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,144.824,1.42982e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210773,14349.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.170319,14359.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,228.204,2.36499e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217378,14718.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.177535,14732.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.40404,44209.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,84.5471,887202 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.209512,14953.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152454,14954.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,136.417,1.49071e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.207992,15110.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.154435,15102.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,136.919,1.49826e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.213855,15262.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154204,15228.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.13481,30471.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.3913,730502 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.194143,15355.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.146505,15347.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,150.66,1.67668e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.194239,15221.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146274,15223.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,151.261,1.66942e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198802,15159.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.141375,15161.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.31095,15161.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.2387,742398 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.166943,15040.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.143394,15040.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.694,737648 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.163673,14898.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136943,14146.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.5494,726618 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.159468,14763.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.140969,14756.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.335684,14756.1 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.437198,16068.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0864414,14754.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.068441,14754.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.203656,14763.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0602685,14727.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.37626,16915.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_158.txt deleted file mode 100644 index 48144ad26dc7dc140a5edfa180c479cc59d88769..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,106.983,1.00411e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.210629,13238.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172882,13242 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,344.771,3.29724e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220501,13510.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169097,13498.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.22127,53964.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,135.473,1.31689e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.221522,14040.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.165369,14029 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,213.54,2.19071e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.216853,14399.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.172425,14388.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.35143,43187.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,77.0486,796273 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.208639,14597.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.150649,14599.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,127.247,1.35788e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.212402,14722.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.156031,14718.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,129.02,1.37963e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.207794,14768.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.153868,14762.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.10291,29534 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.8761,521961 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.183522,14914.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.151078,14899.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,109.464,1.19365e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.182351,14795.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.144255,14795.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,109.675,1.18545e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.200335,14763.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.153033,14755.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29705,14757.7 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.2376,470940 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.160943,14658.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.133474,14651.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.5273,441563 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.147081,14588.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.131423,14588.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.3788,438782 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.14229,14470.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.127119,14469 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.266085,14469 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.322078,15695.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0704924,14467.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0651867,14467.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.183628,14461.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0578333,14451.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.25522,16626.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_159.txt deleted file mode 100644 index a7a8b5d9a3ec2fabec459d839011db147bfc6740..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,107.189,1.00849e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.207365,13280.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.17292,13284 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,343.892,3.28643e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217768,13544.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169167,13527.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.25727,52111.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,142.501,1.38264e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210776,14105 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.181512,13389.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,226.638,2.31586e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.215717,14469.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.164821,14454.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.40595,43400.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,77.7878,803696 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.198363,14667.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.147814,14661.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,128.387,1.37552e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.203436,14735.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.158428,14739.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,129.231,1.38054e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.204719,14786.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.155094,14771.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.13345,28803.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.8006,717605 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.191442,14937.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.144985,14929.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,150.439,1.62404e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.192338,14866.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.155097,14851.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,150.623,1.62379e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.196022,14853 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.153343,14853 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.33276,14853 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,45.0171,486081 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.155516,14746.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.136383,14738.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.9719,460951 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.146412,14617.1 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132364,14617.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,42.0338,458946 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.140834,14546.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128191,14529.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.254411,14527.7 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.338062,15790.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.064758,14523.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0617914,14531.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.1691,14511.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0533373,14488.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.28845,17263.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_160.txt deleted file mode 100644 index 00bd498faf8a57e71a8921395f24aeabc40f2c3d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,107.49,999074 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.213132,13320.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.174021,13322 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,354.074,3.36895e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.228753,13538.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.167845,13544.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.29692,54894.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,140.792,1.36179e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.219112,14078.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.172956,14059.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,224.316,2.25972e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21981,14418.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.169202,14426.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.43881,43309 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,83.9515,863295 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205323,14653.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.146229,14648 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,136.011,1.44758e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20973,14823.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.152873,14802.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,137.215,1.4667e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.212818,14941 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.152124,14942.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.17019,29893.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.5767,708698 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189967,15086.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14302,15080.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,148.706,1.62724e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.200789,14988.5 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.145301,14988.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,148.989,1.62081e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198015,14995 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.148131,14987.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29803,14979.7 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.4482,733743 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.171928,14878.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.139219,14871.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.67,728621 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.162904,14746.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136966,14731.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.448,719795 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.160841,14595.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.150066,14595.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.342087,14595.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.438522,15877.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0986266,14595.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0763738,14595.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.22044,14593.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0572574,14555.3 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.33809,18173.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_161.txt deleted file mode 100644 index feb7f4fb58c604f0fefaf03df971cae5992546a6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,122.356,1.18071e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.217269,13550.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.177503,13548.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,382.136,3.71194e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.231176,13771.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.170626,13773.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.51154,59313 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,153.62,1.50244e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216216,14278.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.166966,14273.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,240.599,2.48672e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.218568,14606.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.168764,14608 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.50083,43849.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,85.4118,885302 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205653,14847.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.157608,14850 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,137.528,1.47878e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.215291,14998.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.16191,14996.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.46,1.50439e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.213874,15129.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.158985,15131.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.18009,30274.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.5398,716742 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.192725,15286.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.148821,15278.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,147.328,1.63476e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.196587,15200.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.152191,15200.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,147.655,1.62946e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.203564,15188.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146652,15181.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.33757,15181.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,45.1015,498280 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.165846,15063.3 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.137698,15063.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.8254,467945 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.14821,14955.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.129154,14955.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.5928,464354 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.139353,14848.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128457,14831.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.263067,14827.7 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.310286,16138.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0623098,14820 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0601242,14820 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.167861,14814.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0523582,14806.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.29183,15558.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_162.txt deleted file mode 100644 index 9b5c953995fcee3f5a87294d58c0d8bfbd03b7b5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,121.846,1.16738e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212412,13516.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.178789,13514.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,384.141,3.7133e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.231051,13746.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.177932,13742.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.45027,57081.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,153.459,1.50471e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.214725,14267 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.171525,14267 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,240.892,2.49179e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.218776,14586.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.17662,14581 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.45359,43766.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,85.7184,890846 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.209708,14837.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.15669,14821.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,137.968,1.48479e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.210277,15003.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.156783,14996.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.221,1.48677e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.212373,15138.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.159698,14358.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.20662,28739 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.4394,734068 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.194104,15259.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.152735,15254 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,152.01,1.6804e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.196335,15152.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.144492,15145.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,152.442,1.67338e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198825,15090.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149068,15090.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32739,15854.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.8544,486266 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.158716,14975.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.136873,14976.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.6245,452680 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.141148,14890.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132601,14873.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.7973,451642 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.144862,14769.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.130998,14769.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.263745,14765.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.308542,16078 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0646363,14759.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0626012,14759.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.17296,14737.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.054352,14733.3 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.29299,16198.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_163.txt deleted file mode 100644 index d440002ea739909721ab71f6a2637e3e4e5e43ae..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,123.074,1.16249e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.209051,13398.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.191187,13396.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,387.127,3.70547e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.228162,13622.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.177708,13624.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.64827,60156.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,153.745,1.47919e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213951,14199.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.161765,14184.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,241.52,2.48585e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.216591,14533.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.173689,14537.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.50728,44349.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,85.5884,886377 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.203624,14767.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.154396,14769.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,137.718,1.48781e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20637,14946.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.153317,14931.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.826,1.50322e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.215826,15089.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154261,15081.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.24472,30148 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.0749,727506 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.203157,15219.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.151103,15219.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,152.012,1.66178e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.195445,15084.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.144671,15084.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,152.243,1.66198e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.20133,15028 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.145701,15012.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.36108,15768 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.7113,740849 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.174435,14920.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.137577,14913.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.9715,735257 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.161385,14763.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133417,14752.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.9213,729651 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.161471,14637.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.133977,14637.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.353121,14637.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.437642,15954.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0868186,14630.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0680221,14630.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.198892,14607.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0548159,14590.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.32261,17525.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_164.txt deleted file mode 100644 index 97bdfb6ee438be0723f2e206b035d0718c2e3c8a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,122.304,1.16425e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.22244,13396.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167529,13398.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,385.788,3.69328e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.233774,13607.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.177596,13592.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.62557,59661.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,154.367,1.49639e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.218431,14122 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.16101,14120.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,242.416,2.46897e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212968,14442.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.170434,14446.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.59689,43382.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,86.0534,891989 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.216181,14680.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.151864,14672.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,138.333,1.48603e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.21461,14840.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.152239,14844.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,139.12,1.49611e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.210792,14988.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154162,14980.5 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.24126,29963 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,63.7203,690521 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.194194,15122.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.145977,15122.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,145.538,1.60083e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.199887,15051.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149733,15045.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,146.134,1.59581e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.196728,15021.7 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.147234,15014 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.35989,15765.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.6506,741016 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.172299,14933.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.137385,14909.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,67.0314,732208 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.167451,14757.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.14326,14742.5 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.6426,726541 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.158204,14609.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.131308,14609.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.346862,14607.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.433082,15904.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0896701,14603.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0712543,14603.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.204255,14599.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.054438,14586.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.31779,16777.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_165.txt deleted file mode 100644 index 9fa8b59e7c6a0f489ea8f6e979c0624c852a1fa5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,112.873,1.0638e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.213458,13339.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.168226,13337.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,377.315,3.59071e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.230875,13527.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.177864,13520.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.50336,56198.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,150.829,1.45033e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.221214,14030.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.163929,14032.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,237.978,2.41404e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213643,14351.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.164613,14355.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.60345,43812.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,84.4675,871314 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.202239,14601 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.141782,14589.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,136.621,1.45704e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.209455,14786 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.160031,14765.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,137.759,1.47485e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.212015,14933.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.15494,14936.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.1892,29872.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.57,713100 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.192287,15038.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14533,15030.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,149.752,1.63643e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.199592,14973.5 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.144655,14958.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,150.309,1.63364e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.202968,14940.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.151817,14940.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.33758,16438.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.663,477395 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.159298,14856.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.135266,14848.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.7325,451800 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.14446,14759.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.130627,14757.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.5139,440631 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.142671,14641.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128082,14632.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.267195,14632.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.318507,15929.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0672828,14628.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0653947,14628.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.180437,14634.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0546011,14613 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.29277,17523.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_166.txt deleted file mode 100644 index c02af1e8401a6cec2ec4354272dc9abd0e36d20f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,113.923,1.0714e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.224281,13198.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172939,13207.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,377.565,3.55303e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.232702,13412.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.183432,13402.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.79093,59497.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,152.642,1.46039e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.218856,13923.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.170549,13925.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,242.518,2.43482e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.217657,14219.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.175279,14208 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.61946,42671.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,86.8015,876756 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.210207,14456.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.153731,14458.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,139.051,1.45895e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.214408,14599.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.158703,14603.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,140.091,1.48103e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.213349,14755.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.157353,14753.5 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.24246,29509 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.5266,720106 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189298,14898 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.149983,14892.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,151.07,1.63582e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.198152,14828.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.143526,14830.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,151.253,1.62572e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.195547,14815 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146559,14813.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.35976,17048.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.7287,483954 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.158703,14708.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.128092,14708.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,42.4148,463991 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.145136,14572.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.130265,14580.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,42.0307,456143 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.147618,14513.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.12759,14495.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.263502,14494 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.354842,15770.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0722334,14484.5 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.060207,14492.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.169365,14484.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0524412,14459.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.30475,14432.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_167.txt deleted file mode 100644 index ac8c32b7b4bd1794f0611a7602470a84e6e113dd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,112.969,1.0537e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.224619,13171.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.171842,13175.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,375.282,3.5406e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.231525,13425.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.185541,13414 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.53881,57140.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,148.81,1.4295e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.226965,13961.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.172766,13948.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,237.478,2.39285e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.222108,14295.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.164779,14307.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.47277,42946.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,85.8913,877468 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.209499,14525.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.144451,14519.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,137.789,1.46302e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.214536,14685 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.159196,14685 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,138.859,1.47172e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.215512,14798.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.153186,14802.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.18734,29612.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.1955,706900 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.199449,14958.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.143065,14958.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,149.895,1.62668e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.198046,14870.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.143423,14855.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,150.421,1.61886e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.204549,14841.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.14949,14833.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.35309,16317.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.6529,729839 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.170831,14737 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.14078,14737 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.8327,724828 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.162245,14603.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.138508,14603.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.8383,719891 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.161839,14465.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.13192,14454 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.33868,14454 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.441764,15755 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.100489,14455.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0759517,14463.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.220239,14450.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0604093,14446.3 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.31403,18772.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_168.txt deleted file mode 100644 index 33036bcf77a68012a02a3338ee2c58a69038a499..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,111.324,1.03407e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.215211,13171.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165667,13175 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,374.938,3.54103e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.231886,13425.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.178543,13421.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.77089,59207.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,149.667,1.43127e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216287,13948 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.159698,13957.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,237.146,2.39731e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214361,14282.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.171519,14288.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.53431,42885.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,84.1279,858904 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.215365,14519.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152021,14498.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,136.524,1.4466e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206584,14678.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.155285,14680.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,137.591,1.46436e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.211666,14814.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.149423,14809.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.19634,29624.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,64.4376,688748 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.192901,14929.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14534,14172.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,147.632,1.60435e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.19077,14868.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.13885,14852.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,148.083,1.59761e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.197036,14846.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.142261,14831 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.35841,16301.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.7453,732092 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.174658,14725.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.133679,14716 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,67.1865,715178 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.161689,14611.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133145,14603.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.9726,713842 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.159023,14458.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.131705,14446.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.338762,14446.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.415287,15740.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0879354,14450.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0723069,14458.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.202501,14442.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0578684,14438.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.34202,18015.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_261.txt deleted file mode 100644 index 93c47b97c816b1c1dc19a5400efa40dc506985d3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,43.1987,439477 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.185701,14185.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167212,14173.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,365.047,3.69466e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.213368,14294.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.166652,14290.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.62853,62022.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,109.777,1.10721e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.203845,14693.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.177138,14689.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,199.71,2.12246e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.207055,14822.3 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.169183,14826.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.47566,43762.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,80.3271,858992 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.197515,15103.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.148857,15088.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,132.428,1.46241e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.198482,15177.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.149909,15139.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,133.266,1.4725e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.203231,15258.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.152713,15246.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.05306,30494.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,56.6371,629761 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.183782,15307.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.143042,15307.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,96.762,1.08074e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.192254,15468.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.144018,15463.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,97.3713,1.09327e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.1935,15628.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.143263,15621 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.37687,16416.1 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,29.2038,337845 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.159132,15690.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.131049,15690.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,27.1636,320391 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.147283,15740.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.127967,15717.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,27.0862,320402 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.14783,15795.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128261,15795.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.273925,15795.7 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.336792,17109.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0655929,15788 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0629178,15788 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.172825,15786.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0526299,15763.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.3047,15743.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_262.txt deleted file mode 100644 index 3068a697954c6bf4d9f86b4a3d24ec31b7d4a496..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,43.6399,444116 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.182037,14215.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.168831,14215.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,363.235,3.70204e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217743,14335 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.186277,14331.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.69022,63146.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,109.704,1.1038e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207161,14740.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.177455,14744.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,199.947,2.10971e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209439,14830 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.180335,14826.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.53269,44510.5 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,80.5834,859763 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.197746,15133.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.156879,15135.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,132.891,1.47015e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20717,15185.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.158905,15181.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,133.429,1.45962e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.205048,15255.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.159791,15253.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.08726,30513.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,56.6426,630938 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.184066,15349.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.150946,15326.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,96.6681,1.09416e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.186546,15497.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149449,15492.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,97.1915,1.10773e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.187295,15642 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.150443,15644 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.28879,15646 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,29.1626,334532 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.160447,15723.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.139119,15708.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,27.0591,320761 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.150464,15761.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132024,15761.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,27.0213,321893 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.141298,15840.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.130947,15817.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.274712,15809.7 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.32196,17131.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0674812,15800.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0629949,15792.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.175653,15792.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.057062,15777.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.27115,19731.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_263.txt deleted file mode 100644 index 2b5bf1c1e37cf8db751dc4bb033bde819d2bb3ba..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,52.6999,546384 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.198037,14537.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167414,14533.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,526.24,5.46434e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.212491,14757.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.167157,14757.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.60362,63177.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,155.518,1.62047e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209423,15167.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.173631,15165.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,290.087,3.16438e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212972,15275.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.175055,15281.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.51486,45826.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,112.17,1.24284e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.197132,15611.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.155762,15611 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,183.325,2.06789e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206267,15624.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.157141,14829.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,183.851,2.06724e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.20941,15653.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.161016,15644.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.16421,30495 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.3042,765185 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.187221,15779.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.147635,15771.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,118.119,1.37467e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.200239,15964.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.145609,15968.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,118.979,1.3983e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.197608,16191.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.157228,16195 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.33445,17012.9 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,39.8238,470470 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.177077,16258.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.142783,16250.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,38.458,463975 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.157321,16277.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.135417,16277.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,38.7033,466826 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.158914,16259.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.137087,16236.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.328145,16236.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.391175,17681.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0729914,16228.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0665116,16228.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.187836,16196 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.054979,16178.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.31858,16989 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_264.txt deleted file mode 100644 index 6c046346913f021ca6bd916ed293565c7fabbf7a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,52.9083,548904 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.188988,14577 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167944,14565.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,527.515,5.47719e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.223637,14765 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.184175,14763.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.67222,65025.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,155.107,1.62576e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211368,15163.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.182633,15165.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,289.121,3.155e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210738,15292.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.177333,15298.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.62499,45132 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,111.879,1.23559e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.198725,15603.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.155925,15597.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,182.352,2.07077e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.205867,15653.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.162169,15649.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,183.248,2.07693e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.209864,15688.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.16077,15674.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.18998,30561.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.1793,763391 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.187416,15790.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.151935,15787.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,118.002,1.37394e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.197512,16029 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149721,16000.5 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,119.32,1.40387e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.197349,16223.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.155624,16208.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.36869,16208.5 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,39.8666,472541 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.166613,16269.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.145027,16254.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,38.6649,469343 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.155164,16289.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.140386,16251 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,38.7998,473514 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.15557,16303.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.140963,16280.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.321259,16280.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.363988,17724 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0740189,16270.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0704318,16255.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.184969,16247.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0610524,16228.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.31395,17837.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_265.txt deleted file mode 100644 index 2befd2504c3dc98626f0cf144d9f914b26a00f52..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,52.861,551786 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.19061,14577.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.168297,14573.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,527.678,5.49846e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219905,14759.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.17852,14765.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.68147,63992.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,155.786,1.62992e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.204639,15171.5 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176396,15171.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,289.384,3.16146e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209138,15283.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.170386,15281.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.57568,45853.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,112.025,1.24576e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.198178,15611.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.149458,15607.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,182.531,2.07462e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208482,15640.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.155228,15630.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,183.398,2.07862e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.205967,15678.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.155631,15680.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.12333,31353.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.3123,767247 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.188332,15814.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14655,15783.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,118.415,1.38105e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.196828,16023.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.154956,16008 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,118.755,1.40144e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.194117,16244.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149375,16246.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32173,17056.9 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,39.546,471887 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.168028,16286.7 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.143801,16277.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,38.6609,468370 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.157347,16316 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133135,16298.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,38.9068,474441 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.1615,16324.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.13646,16316.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.310766,16309.1 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.390116,17764.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.073539,16307.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.066435,16297.5 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.18516,16288 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.056915,16238.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.36499,18581.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_266.txt deleted file mode 100644 index 9aa16c022611cdd686be61a166338c885af0a1ed..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,48.9633,498139 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.188476,14343.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.164991,14324.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,462.396,4.69388e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.215061,14407.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.17005,14405.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.69412,64206.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,141.884,1.45206e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208936,14813 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176863,14807.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,261.73,2.76762e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.197772,14986.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.152431,14982.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.57217,43455.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,109.702,1.18372e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.196226,15375.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.154367,15364.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,171.412,1.91128e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20068,15427.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.152178,15421.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,172.213,1.92151e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.204831,15488.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.156274,15475.3 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.13947,30956.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,68.3793,769846 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189381,15641.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.139174,15641.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,119.871,1.38514e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.188025,15875.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146639,15070.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,120.711,1.39485e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.200629,16125.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.145631,16118.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32165,16120 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.4299,545064 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.168111,16114.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.139743,16091.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.1753,538166 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.156773,16061.1 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132601,16045.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,45.9512,540783 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.156547,16019.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.133813,16017.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.322718,15224.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.417709,17440.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.081014,16021.5 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0698172,15224.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.197542,15982.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0593022,15952.1 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.34739,17536.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_267.txt deleted file mode 100644 index d819d236d8e802c02e1de724a58a17b19d4d88a7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,50.5904,517863 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.188715,14278.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.169212,14268.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,462.506,4.68297e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.216411,14363.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.174258,14369.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.78763,64820 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,143.038,1.45618e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213804,14752.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176261,14748.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,263.493,2.77383e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209698,14883.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.165583,14887.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.64322,43186.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,110.266,1.18977e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.202728,15279 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.156834,15265.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,171.977,1.91069e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206021,15347.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.155583,15343.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,173.056,1.92271e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.202786,15420 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.156931,15406.5 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.09972,30828.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,68.7968,772608 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.188124,15559.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.147458,15551.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,120.378,1.38145e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.201964,15816.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146732,15804.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,120.941,1.40528e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.196776,16032.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.148233,16024.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32654,16816.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.7253,548974 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.171974,16053.3 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.143167,16045.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.3979,538337 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.157426,15989.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136745,15982.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,45.2571,537055 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.155308,15955.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.136764,15955.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.320939,15955.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.401793,17374.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0745373,15954 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0677216,15946.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.185097,15938.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.065555,15111.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.30353,18278.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_268.txt deleted file mode 100644 index f83aa5a4709dc52995ef15de36e536773af41e2c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,49.905,511744 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.187695,14326.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.165718,14327.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,462.989,4.69437e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217333,14423 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.174975,14424.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.61147,61400 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,141.977,1.44141e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.221525,14816.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.183052,14824.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,262.246,2.76769e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.207554,15004.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.159272,14997.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.60644,45018.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,109.646,1.19077e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.198764,15389.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.158476,15381.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,171.827,1.9213e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.200399,15435.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15741,15412.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,173.02,1.93411e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.199449,15517.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.152559,15502.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.19238,30218.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,68.6716,767159 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.185068,15624.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.15005,15626 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,120.362,1.39028e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.191695,15864 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.151887,15856.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,121.256,1.41644e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.196053,16110.7 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146831,16095.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.3246,16095.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.4277,543855 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.169686,16091.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.142594,16074.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.518,542008 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.158536,16065 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136418,16065 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,45.2,538987 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.153132,16017.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.134201,16002.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.319528,16002.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.428074,17417.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0757755,15994.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0674875,16002.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.187858,16000.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0532381,15969.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.32115,18330.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_269.txt deleted file mode 100644 index c6c446c551e592d1da97339ecb863280e123ac11..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,49.0628,504810 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.189618,14396.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.170162,14370 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,462.758,4.70641e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217557,14449.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.172396,14446.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.61854,63635.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,142.229,1.44652e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.209029,14822.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.166812,14811.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,261.43,2.77124e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.20309,15000.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.141414,14987.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.54698,44965.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,109.407,1.19114e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.190463,15387.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.143935,15368.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,171.196,1.91364e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.197474,15456 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.159666,15450.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,171.828,1.91883e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.201657,15532.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.158601,15509.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.06754,31025.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,68.4646,768095 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.186047,15679.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.143929,15671.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,119.875,1.37399e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.193714,15896.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149308,15890.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,120.764,1.41222e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.193797,16146.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.143548,16146.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.3176,16955.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.5517,549285 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.16548,16142.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.140348,16142.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.1182,540106 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.153814,16093.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.1375,16093.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,45.1643,540580 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.153097,16069.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.134207,16069.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.306737,16060 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.406349,17482.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0792187,16036.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0665277,16044.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.185083,16035 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0555928,16017.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.3666,18389.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp32_perf_fp32_120.txt deleted file mode 100644 index abb7299681b6fb721682e76b110e429fa58b0bff..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar10/vgg16_cifar10_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,154.154,1.54199e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.220064,13668.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.13983,13662.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1317.67,1.13951e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.23238,11673.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.214768,11673.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,8.51273,72467 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,329.448,2.71108e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.217445,12526.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.124732,12532.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,436.005,4.05173e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.226307,13727.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.135171,13737.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,5.7522,61258.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,153.512,1.50259e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.219411,14562.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.169039,14539 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,288.878,3.01868e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.207609,15309.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.11494,15307.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,267.814,2.9785e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.212941,16040.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.163091,16021.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.73827,34424.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,112.878,1.27665e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.208707,16607 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.158825,16599.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,217.151,2.64636e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.21327,17216.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.154083,17212.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,197.886,2.48057e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.218889,18076 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.156406,18069.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,2.04499,35244 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,95.0991,1.20515e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.187971,17730.3 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.143526,17715.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,87.9554,1.13323e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.185939,17476 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.141891,17451.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,91.9726,1.16314e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.178909,17143.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.139305,17133.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.401583,17132 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,1.19045,19597.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.133037,17132 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.129558,17124.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.382002,17109.1 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.115561,17099.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.06808,17067.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_120.txt deleted file mode 100644 index ecd5f1f273a176bed586d43a30ce54ac3128fc96..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,105.152,1.06919e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.21909,14374.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.177375,14376.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,561.762,5.73341e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.232674,14455.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.193036,14449.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.83179,57022.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,187.585,1.9176e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210258,14954.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.12759,14958.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,308.952,3.36508e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213938,15426.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.185135,15418.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.13165,39204.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,106.808,1.1383e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.208075,15658.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.161487,15658.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,185.985,2.10699e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.209723,15859.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.161192,15846.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,186.116,2.12537e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.212821,16028.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.172169,16022.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.01809,32051.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,71.5588,819917 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.199356,16227.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.149874,16206.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,129.81,1.55325e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.232808,16487 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.155906,16464 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,130.779,1.57442e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.201823,16709.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.150386,16701.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.26897,16701.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.4552,811202 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.172434,16504 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.141912,16496.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.4187,805044 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.168658,16281.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.140319,16272.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.0525,792083 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.166613,16093.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.14109,16078.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.362695,16078.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.441015,17518.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0906938,16071 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.075007,16078.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.206635,16027 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0573946,16004.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.45245,29565.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_151.txt deleted file mode 100644 index c526f423205cedfa53089441044ee9bbc63d55b2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,107.388,1.01178e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.217218,13386 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.176124,13391.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,278.896,2.7073e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.22901,13702.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176969,13689.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.89328,54801.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,108.141,1.05324e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.217774,14204.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.174108,14210.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,161.17,1.65817e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.224632,14471 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.180677,14463.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.26071,41930.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,64.1092,645366 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.223637,14708.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.155749,14687.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,97.7646,1.05705e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.211176,14884.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.155685,14872.9 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,98.1629,1.06561e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.218981,15025.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154259,15017.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.09586,31586.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.7943,519587 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.187666,15119.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.148316,15111.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,109.364,1.20515e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.19005,14980.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.153285,14965 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,109.637,1.19884e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.203541,14885 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.153398,14885 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29977,14886.9 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.3599,486354 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.159106,14765.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.133414,14758 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.5714,458799 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.147109,14647.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132844,14639.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.5871,454928 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.144597,14508.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.126316,14506.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.257944,14502.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.343559,15689 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0686942,14510 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.061484,14510 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.17542,14502.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.054467,14483.3 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.33096,24739.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_152.txt deleted file mode 100644 index 22b1e98bab229ab157903d6bf9fb60615a218d73..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,107.908,1.02402e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212172,13402.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.186924,13406.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,277.563,2.6994e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.228968,13714.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.173701,13702.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.86616,54839.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,107.244,1.04932e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.22834,14249.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.175698,14247.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,159.887,1.65401e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.22323,14512.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.173961,14520.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.20118,42108.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,63.708,657531 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.213141,14765.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.156402,14765.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,96.9116,1.05306e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20861,14935.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.154582,14937.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,97.7397,1.06611e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.210031,15090.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.155897,15086.5 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.95418,30190.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.6037,530072 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.185836,15168.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.172722,15168.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,109.428,1.21203e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.193663,15044.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.145718,15037 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,109.937,1.20456e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.196133,14906.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146338,14900.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29171,14885.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.267,474744 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.155106,14799.7 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.132636,14799.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.6441,447564 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.150284,14691.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.128828,14689.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.4371,444369 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.142124,14594.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128037,14594.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.25515,14592.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.319064,15796.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0651101,14588.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0606424,14586.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.178978,14571.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0551675,14558 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.26003,23364.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_153.txt deleted file mode 100644 index b3b55cf48c5aa2edccc42166c8a667fa9ba0525d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,96.0034,900818 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.211548,13286.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.179759,13276.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,267.491,2.58029e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.231195,13563.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.1787,13570.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.94008,53273.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,105.705,1.02375e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.218053,14068.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.181458,14066.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,158.017,1.59746e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21758,14317.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.188793,14323.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.11806,37617 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,63.4123,639537 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.200844,14561.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.1607,14553.3 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,96.4935,1.03518e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.203944,14741.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.153634,14743.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,97.3106,1.04844e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.209634,14918.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154399,14914.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.92911,29829.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.6112,527726 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.179531,15000.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.146549,14985.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,109.788,1.20457e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.192236,14862 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.148607,14846.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,110.181,1.19503e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.186904,14719.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149225,14719.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.27559,14721.5 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.1905,471396 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.161407,14649.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.136101,14641.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.5769,443246 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.14295,14546.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132989,14546.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.4194,437376 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.145609,14460.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128675,14452.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.265003,14450.5 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.338891,15639 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0663772,14446.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0625948,14437.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.175772,14443 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0563163,14410.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.46747,27308.1 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_154.txt deleted file mode 100644 index 92ec828264303390e919dffb5b8202e7056a7d2e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,97.3448,902774 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.210172,13227.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.170262,13227.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,268.012,2.57212e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.224734,13479.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.178885,13478.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.01295,54678.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,105.258,1.01806e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.218056,14017.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.174386,14024.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,157.903,1.60454e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.221938,14271.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.178588,14269.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.2309,38459.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,63.5376,644852 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205026,14492.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.154486,14489.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,96.3055,1.01845e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.210549,14673.3 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15438,14679 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,97.519,1.04651e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.213899,14821.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.148971,14821.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.04105,29658.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.7265,529032 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.183804,14962.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.147218,14949.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,110.53,1.20448e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.19061,14805.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.147106,14774.9 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,110.335,1.19139e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.191861,14719.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146937,14719.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29885,14719.5 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.3436,482803 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.158427,14615 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.132246,14615 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,42.107,458106 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.156572,14494.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.129564,14493 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.6751,449618 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.14228,14373.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.131922,14371.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.258302,14366.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.318139,15539.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0647836,14364.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0625307,14364.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.171909,14358.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0550554,14345.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.40385,25968.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_155.txt deleted file mode 100644 index 189a292990df4a90fcbde996d521074e0caf9aa9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,114.652,1.10512e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.208811,13621.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.170303,13606 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,342.146,3.37742e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.21853,13933.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.170575,13933.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.84349,55767.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,134.622,1.34003e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.212524,14525 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.173682,14523.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,210.24,2.22411e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.208926,14928.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.164172,14917.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.1884,41744.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,75.4834,810192 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199346,15154.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.14182,15150.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,124.381,1.38114e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206242,15277.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.154275,15275.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,125.539,1.37689e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.20549,15403.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.155395,15397.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.97444,30801.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,46.9719,515309 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189151,15460.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.146434,15462.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,105.6,1.19399e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.18374,15303.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146457,15303.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,106.068,1.18485e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.181388,15172.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.142831,15174.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.23698,15174.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.3997,498499 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.153365,15051.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.128223,15051.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.414,467223 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.142194,14939.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.124713,14931.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.3199,462608 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.141583,14833.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.125522,14816.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.259893,14812.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.308267,16073.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0629309,14808.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0599613,14808.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.16757,14820.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.053683,14793.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.48006,28157.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_156.txt deleted file mode 100644 index 04a89e57adad7d4a3efd4d25589a57176222c512..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,114.563,1.10576e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212958,13613.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.181583,13602.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,340.917,3.36151e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.225963,13930 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176498,13937.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.9187,56460.7 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,140.943,1.38834e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216638,14542.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.177244,14528.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,223.057,2.34305e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210335,14963.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.179829,14957.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.11012,41106.1 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,75.249,799106 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.207253,15192.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152751,15173.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,123.141,1.36856e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206875,15321.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.153938,15321.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,124.296,1.38348e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.209314,15443.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.154937,15422.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.07298,30851.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.0923,740815 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.1879,15521.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.148905,15513.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,150.117,1.6834e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.198159,15337.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.15086,15309.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,150.573,1.66636e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.19173,15167 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.145801,15157.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2818,15916.7 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.7546,484715 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.158777,15050.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.132258,15033.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.5319,456047 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.150447,14926.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.128162,14926.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.3782,449441 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.143823,14824.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.135957,14815.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.269835,14811.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.330065,16106.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0638014,14809.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0603612,14809.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.169862,14801.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0552574,14795.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.42526,27375.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_157.txt deleted file mode 100644 index 3355365dc018744ad1a13d28b4bcbb9ea9aa3544..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,115.17,1.10581e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.214194,13640.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.192386,13615.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,352.209,3.47654e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.230485,13933.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.170828,13941.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.84019,55787.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,140.527,1.39151e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213058,14547.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.169804,14540.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,222.489,2.35001e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213909,14961.5 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.166185,14965.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.19234,41113.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,82.8968,887210 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199749,15221.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.149455,15213.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,133.074,1.4816e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20723,15405.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15884,15407.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,134.342,1.49634e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.20782,15601.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.155199,15570.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.96713,31145.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.5628,733081 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.194386,15675.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.151017,15668.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,149.146,1.69338e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.194975,15453 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.148482,15437.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,149.707,1.67093e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.192037,15269.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.15029,15261.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.28831,16018.5 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,66.8287,746356 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.175512,15126.7 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.142684,15126.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.4599,737366 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.173397,14956.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.142738,14956.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.0923,727416 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.160898,14820.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.134687,14803.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.342158,14803.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.443492,16116 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0908923,14801.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0679643,14801.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.206552,14815 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0579709,14790.3 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50242,28888.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_158.txt deleted file mode 100644 index d519f756fb24c124a78a441beb64ef6b500bda78..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,104.005,982620 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.21652,13347.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.169279,13339.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,334.014,3.21074e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221484,13673.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.170626,13677.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.94483,54335.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,132.86,1.30108e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.214875,14223.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.17502,14211.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,210.266,2.16598e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.214658,14610.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.156284,14608.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.1546,40850.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,75.9882,804125 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.196178,14830.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.140562,14826.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,125.195,1.36587e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.205058,14977.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15524,14962.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,126.555,1.3718e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.209429,15067.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.153865,15062 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.96234,30133.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,48.135,524368 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.190892,15149.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.147606,15141.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,108.527,1.20022e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.186191,14991.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146015,14983.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,108.975,1.18932e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.194197,14878.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146274,14871.3 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.25158,14873.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.5028,476529 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.165455,14791.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.137708,14789.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.7184,449443 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.147759,14685.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133196,14065.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.3824,440233 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.14014,14590.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.13109,14582.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.259518,14578.7 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.320593,15807.1 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0654427,14578.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0630333,14574.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.171532,14561.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0548188,14557.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.35723,25528.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_159.txt deleted file mode 100644 index 84702c96ffc953fd2e1350a3818c1d1dc2ca411c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,104.253,985547 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.207788,13312.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.173077,13310.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,334.204,3.22428e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219388,13655.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.175544,13649.9 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.91091,54652.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,138.704,1.35367e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211772,14223.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176668,14226.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,220.793,2.28541e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21269,14654.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.175724,14639.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.17622,40976.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,76.6529,798480 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.20397,14842.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.154671,14844.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,126.51,1.37455e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20499,14949.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.158063,14949.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,127.097,1.38155e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.204562,15060.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.17084,15046.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.04432,30093 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.8159,717678 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.185471,15145.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.162956,15131.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,148.493,1.63189e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.191887,14987.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149036,14987.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,149.129,1.60367e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198638,14896.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.154646,14888.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29081,14888.9 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,45.1633,493666 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.161356,14797.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.134595,14780.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.6363,459435 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.138162,14657.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133961,14657.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.8212,456622 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.147074,14577.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.132837,14569.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.271144,14567.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.330119,15834.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0699677,14559.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.065619,14565.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.179292,14559.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0594974,14536.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.41956,26942.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_160.txt deleted file mode 100644 index 7cca3d0807f2f07b371ed2d7ecc09c46cb98814c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,104.742,992497 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.21037,13347.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.16525,13342 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,344.837,3.32325e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220447,13617.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.178066,13608.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.932,52311.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,137.356,1.34175e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.218773,14181.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.170047,14173.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,218.658,2.25012e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.215397,14600.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.171292,14604.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.1774,41599.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,82.2516,853073 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.207183,14859.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.153026,14851.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,133.087,1.44949e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.210549,15069.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.155439,15048.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,133.645,1.46219e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.214248,15231.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.158274,15234.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.98313,30477.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,64.8349,714762 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189154,15372.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.154758,15349.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,147.444,1.64201e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.197356,15130.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.147513,15130.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,148.167,1.63163e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.197666,15023.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.14807,15017.7 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.31627,15017.7 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.0279,734773 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.174793,14903.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.13868,14903.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.4437,728622 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.161017,14773.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.139244,14765.9 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.4334,719978 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.16756,14627 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.135622,14627 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.348855,14625 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.445341,15906.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0952541,14625 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0703513,14625 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.214754,14617.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0575292,14595.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50441,28527.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_161.txt deleted file mode 100644 index 74007f27b84264ab048cdf0559788f01861c4b3d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,118.468,1.14623e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.214315,13651.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.175205,13644 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,374.619,3.66935e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.225039,13876.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.174059,13869.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,4.88226,55459.3 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,148.591,1.47676e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211617,14452.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.172885,14447.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,233.337,2.43933e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.21253,14816.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.164812,14829.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.09059,39985 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,82.7071,877005 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199592,15105.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.143519,15099.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,133.588,1.47736e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.2094,15344.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.153999,15336.5 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,134.532,1.49512e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.20918,15536.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.189119,15536.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.03014,31081.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,64.3475,722642 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.190492,15614.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14638,15599.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,145.423,1.64795e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.193577,15447.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.151512,15449.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,146.18,1.63677e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198859,15326.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.145824,15318.5 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2778,16085.1 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.7669,499811 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.162642,15205.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.134566,15175.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,41.8066,474123 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.149816,15052 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.128774,15036.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.5007,466513 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.145311,14948.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128626,14948.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.266741,14940.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.323131,16235.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0658558,14933 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0638906,14931 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.180242,14921.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0556859,14894.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.45246,27563.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_162.txt deleted file mode 100644 index f7efa4236329b869a83bada18c7d6e0886466beb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,118.741,1.14215e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.211128,13546.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.179547,13562 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,376.67,3.66557e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.221951,13796.7 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.17246,13806.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.12953,56655 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,148.969,1.46177e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213608,14386.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.179263,14395.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,233.791,2.4429e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213327,14786.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.169298,14794.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.28015,42155.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,83.645,880490 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.205535,15033 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.153039,15038.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,134.725,1.48053e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206322,15275.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.159394,15269.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,135.171,1.49384e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.208434,15464.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.150521,15454.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.05906,30917.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.57,743440 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189336,15540.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.151551,15525.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,150.398,1.69329e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.193999,15353.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.145695,15330.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,151.252,1.6769e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.194469,15178.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.148597,15153.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2828,15153.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.8369,485190 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.158233,15065.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.136079,14287.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.8194,454889 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.145096,14950.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.130073,14933.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.3038,449279 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.142847,14845.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.128367,14824.5 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.266353,14824.5 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.316289,16117.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0665598,14820.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0616826,14820.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.177961,14820.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.058387,14809.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50631,28880.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_163.txt deleted file mode 100644 index 18b0d55b59930fba7d49e229cf26daf519c17c74..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,118.318,1.12989e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.210421,13493.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.179951,13493.9 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,375.458,3.64614e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220639,13794.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.169705,13792.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.11998,55876.9 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,148.871,1.46612e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21539,14357.9 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.172556,14321.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,234.573,2.43904e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209803,14795.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.165148,14791.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.24031,40255.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,83.4016,881618 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.203858,15051.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.139942,15051.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,134.274,1.47313e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.2066,15250 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.155855,15252 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,134.932,1.48941e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.210235,15437.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.156099,15422.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.07714,30856.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.9037,731946 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.197762,15546.3 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.148146,15548.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,149.404,1.67873e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.200399,15316.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.1454,15309 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,150.301,1.66482e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.200472,15174.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.143446,15176.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.28163,15178.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,66.7982,737711 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.171948,15025.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.138243,15023.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.5307,732768 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.166908,14868.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.137001,14851.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.4203,725713 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.159138,14708.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.136812,14693.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.339854,14693.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.437914,15990.5 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0918619,14693.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0708121,14699 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.211887,14689.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0560191,14670.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50579,28671.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_164.txt deleted file mode 100644 index 9f999df508b0097dc35d12b781d61a2db0267ccf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,119.105,1.13978e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.211033,13430.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.167749,13438.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,375.932,3.64148e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.229423,13737.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176364,13735.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.13915,54962.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,149.104,1.46648e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.21909,14292.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.170073,14294.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,234.99,2.43435e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.212175,14702.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.162175,14683.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.36468,44082 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,83.4489,880213 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.219653,14952.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.147356,14942.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,135.007,1.47847e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.209525,14399.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.156454,14405.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,135.414,1.47701e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.211666,15344.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.162901,15346.1 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.11945,30699.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,62.6361,693395 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.193426,15437.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.145029,15430.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,143.801,1.61282e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.201596,15272.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.143922,15265.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,144.863,1.60107e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.201522,15151.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.143903,15136.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29956,15136.1 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,66.9644,740178 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.167829,15037.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.137228,15027.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.7179,738882 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.162258,14873.9 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136319,14864.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.4668,728791 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.161548,14741.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.134018,14716.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.355752,14716.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.426999,16011.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0889654,14710.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0696798,14712.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.196501,14698.9 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0560892,14674 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.51014,28632.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_165.txt deleted file mode 100644 index 4adb029c932d180ff157c72449ef5e176dd6318d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,110.403,1.0538e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.206396,13407.9 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172418,13381.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,369.42,3.51865e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.232094,13605.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.184005,13594.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.25174,55789.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,146.687,1.42799e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.215631,14137.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.166502,14139.1 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,232.684,2.38229e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.211449,14547.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.166805,14543.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.31008,42195.3 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,82.879,866005 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.20573,14781.5 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.149058,14781.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,133.915,1.44632e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208213,15000 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.163017,14996.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,134.348,1.46492e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.208552,15225.3 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.156473,15217.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.11087,30446.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.5125,719686 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.199349,15307.1 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.148121,15307.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,148.209,1.6465e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.197042,15149.1 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149401,15126.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,148.888,1.63166e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.201605,15025.7 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.150009,15018 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29136,15760 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,43.3923,474948 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.161455,14925.5 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.137384,14910.3 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,40.3975,451244 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.144238,14784.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.133177,14784.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,40.5106,447381 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.146268,14703.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.127609,14693.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.258312,14689.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.337,15969.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0632411,14685.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0661947,14683.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.178165,14674.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0551931,14668.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50292,28539.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_166.txt deleted file mode 100644 index 743386a5264e4118f7902564f26afdd60c6cb4fd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,110.662,1.03491e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.217679,13276.5 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.17133,13261.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,367.799,3.51004e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.22844,13533.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.181439,13528.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.27467,55958 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,147.144,1.42659e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.226341,14089.7 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.168815,14093.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,233.945,2.39505e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.216162,14470.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.164748,14468.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.30686,40888.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,84.0718,870818 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.207064,14727.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.147397,14717.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,134.92,1.457e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206853,14954.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.151842,14941.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,136.018,1.47743e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.217714,15149.5 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.157173,15143.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.12146,29513.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.5401,727501 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.198636,15232.9 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14662,15234.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,149.575,1.65533e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.196645,15065.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146306,15060 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,150.11,1.63542e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.197458,14941.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.142492,14939.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32995,15681.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,44.9229,489101 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.163202,14830.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.135376,14820.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,42.0917,462904 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.14365,14704.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.129526,14689.3 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,41.892,456969 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.138956,14603.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.124233,14595.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.26276,14593.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.325396,15871.8 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0643294,14582.3 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.061238,14580.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.179935,14565 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0559071,14553.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.4314,26993.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_167.txt deleted file mode 100644 index 0c0ce45f0e9ca21798834a2dcc5d89480c3aa4b8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,109.543,1.03105e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.212667,13238 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172652,13238 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,365.749,3.47448e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.229461,13535.3 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176459,13524 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.30098,55153.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,146.898,1.42171e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.220315,14084 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.17229,14078.3 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,232.114,2.36594e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.230623,14460.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.18078,14464.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.52148,43402.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,84.4898,873339 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.208968,14735.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152662,14714.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,135.934,1.46153e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.20965,14923.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.156329,14910.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,136.37,1.46507e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.21453,15097.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.159624,15089.9 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.14539,30952 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,65.7065,720241 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.193589,15194.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.14613,15179.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,148.704,1.63405e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.200008,14987.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.152262,14989.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,149.107,1.62284e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.197483,14903.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149141,14903.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32388,15556.3 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.2184,727451 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.172287,14775.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.150789,14759.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.6351,725542 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.161106,14622.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.134348,14607 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.3955,716563 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.158357,14503.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.133651,14503.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.338183,14502 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.433975,15787.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0982811,14502 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0712315,14500.1 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.206466,14496.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0560892,14477 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.49951,28276.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_168.txt deleted file mode 100644 index b99d2b30cba81cfa2ce0a9d416db8d0e021c7150..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,109.964,1.02782e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.209144,13222.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.169343,13224.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,368.175,3.47789e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.227381,13499.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.176818,13511.3 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.26897,54774.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,146.055,1.40689e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.221614,14043.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.173208,14049.5 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,232.12,2.35205e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213768,13728.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.173074,13724.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.28884,39738.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,82.6531,845603 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.208508,14704.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.15445,14698.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,134.491,1.42239e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.230827,14908 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.164619,14904.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,134.587,1.43033e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.211865,15074.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.158169,15074.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.08325,30154.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,63.6095,673646 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.191759,15173.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.144873,15173.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,146.355,1.61068e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.1958,15011.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.148201,14254.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,147.031,1.59311e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198533,14895.5 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146693,14890 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.30927,15534.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,67.098,716002 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.170178,14771.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.138127,14771.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,66.6769,723222 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.16181,14638.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.138575,14636.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,66.7153,718120 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.161426,14494.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.137394,14494.7 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.350289,14494.7 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.444647,15757.3 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0872635,14490.7 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0668315,14488.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.202018,14482.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0601662,14465.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.49285,28251.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_261.txt deleted file mode 100644 index 2c67cf74bc5de24304a0e61e2b1a29240f169f77..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,41.6586,423473 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.187628,14233.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.176332,14223.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,358.486,3.66111e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.214203,14409.9 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.186911,14396.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.2347,59034.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,108.454,1.1053e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207125,14796.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.17886,14798 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,196.633,2.11168e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209125,14958.1 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.179363,14958.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.34564,44910.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,79.9596,867722 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.194216,15261.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.157282,15238.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,130.911,1.45884e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.202597,15359.5 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.162287,15336.7 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,131.525,1.46554e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.205582,15443.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.159151,15430.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,1.97105,30845.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,55.995,627529 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.182146,15494.7 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.151023,15494.7 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,95.7772,1.09279e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.186005,15612.5 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149868,15584.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,96.2079,1.10512e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.187922,15707.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.150709,15693.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2862,15695.9 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,29.1307,337100 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.159001,15768.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.142741,15768.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,26.9254,321724 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.147507,15836.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132421,15824.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,26.7507,320416 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.145679,15874.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.133932,15866.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.265982,15865 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.341304,17188.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0655133,15863.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0640187,15868.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.17404,15859.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0565371,15828.5 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.47606,29997.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_262.txt deleted file mode 100644 index f14d74e574c91ea99311d1704ce7509fd59bd68e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,43.1483,438855 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.186162,14160.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.171186,14162.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,356.871,3.63702e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217039,14342.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.172565,14346.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.31939,58857.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,108.839,1.10217e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.200377,14765.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176156,14769.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,196.699,2.10974e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.209192,14919.7 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.172882,14922.1 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.41312,42508 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,79.8326,866160 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.19389,15208.1 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.151925,15208.1 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,130.48,1.4529e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.202415,15350 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.152498,15344.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,131.45,1.4663e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.199487,15397 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.155215,15393.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.06297,30770.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,56.2601,633310 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.177609,15487.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.149679,15468.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,95.6505,1.09524e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.187724,15589.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.143983,15578 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,96.5862,1.10719e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.185352,15675.9 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.146383,15655.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.26552,15655.1 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,29.1971,336277 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.157839,15719.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.143161,15709.5 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,27.1253,321619 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.150597,15778.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.135462,15765.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,26.9022,318713 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.150757,15811.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.132566,15811.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.263371,15811.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.34292,17127 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0661213,15803.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0634942,15805.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.17012,15780.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0542846,15763.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50226,30795.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_263.txt deleted file mode 100644 index 247eb7b675a38e9081891c54cacb6c161ed4dbdb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,50.9234,535554 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.195403,14636.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.175452,14620.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,518.426,5.43526e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.216277,14809.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.17759,14817.1 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.42519,61606.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,153.413,1.61083e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.220498,15255.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.213352,15243.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,285.114,3.14387e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210978,15424.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.178655,15426.7 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.39062,46304.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,110.742,1.24232e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.200325,15777.7 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.159061,15764.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,179.691,2.06888e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.208322,15842.7 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.159765,15831.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,180.557,2.0787e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.212366,15884.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.160652,15869.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.03828,31748.3 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.7223,767171 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.193103,15989.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.149388,15974 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,116.697,1.36662e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.198892,16141.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.147193,16133.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,117.454,1.39289e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.20044,16298.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.148207,16279.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2809,16271.7 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,39.1437,468696 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.171871,16351.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.145081,16344.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,37.9035,462892 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.154028,16350.1 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.140879,16350.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,38.3762,468250 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.160856,16378 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.139458,16370.3 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.325403,16370.3 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.390788,17815.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0718173,16353 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0646715,16345.3 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.179743,16337.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0558013,16312.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.54232,32547.9 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_264.txt deleted file mode 100644 index 878f8d87162aa551eb44d6e2c79c6dccccafefea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,50.0958,528452 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.191519,14650.7 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.168265,14641.5 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,520.446,5.44998e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.219042,14838.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.18717,14832.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.47338,60801.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,153.435,1.61459e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.207189,15238.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.17886,15240 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,286.302,3.13869e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.210693,15386.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.178072,15392.3 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.40118,46257 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,110.89,1.23083e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.206536,15762.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.152713,15754.7 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,180.001,2.05725e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.209068,15808.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.156223,15812.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,180.808,2.06412e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.209374,15878.9 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.156092,15882.7 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.06083,31759.7 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.3801,770597 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.187692,15987.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.146994,15979.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,116.85,1.37985e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.195074,16154.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.151641,16149 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,117.299,1.39156e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.199852,16304.1 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149961,16290.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.29325,16290.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,39.5039,472278 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.175125,16359.3 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.143804,16336.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,38.041,463769 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.153064,16356.5 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.132758,16348.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,38.424,469832 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.153106,16374.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.13542,16359 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.308513,16359 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.359895,17810.2 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0677469,16355.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0626397,16339.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.181554,16337.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0631007,16318.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.49855,31858.5 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_265.txt deleted file mode 100644 index 36826ba6b9c86fa7510621e4962a02433d1be746..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,50.5423,515981 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.198585,14612.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.175669,14613 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,519.793,5.43925e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217084,14836.1 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.179282,14822.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.45173,61576.4 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,153.575,1.61708e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.210207,15253.3 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.175103,15245.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,286.184,3.15231e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.211147,15413.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.175359,15407.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.47327,46233.9 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,110.892,1.22944e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.199208,15754.9 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.157708,15754.9 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,179.987,2.06213e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206335,15835.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.15973,15831.3 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,180.9,2.07261e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.208188,15880.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.167141,15873.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.05679,31752.1 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,66.9345,771288 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189138,15930 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.154322,15931.9 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,116.787,1.36718e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.197935,16112.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.149583,16107 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,117.508,1.38856e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198248,16269.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.153516,16269.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.3271,16267.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,39.253,454324 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.173289,16321.1 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.147554,16305.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,38.1583,449501 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.159545,16317.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.140188,16317.7 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,38.5135,467491 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.158671,16341.7 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.139055,16326.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.331435,16326.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.347447,17051 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0876053,16318.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0679453,16320.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.185029,16308.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0602749,16308.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50535,31811.6 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_266.txt deleted file mode 100644 index 45da23840f0db91dba22812086508d2375272ab5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,48.0955,491356 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.191413,14327.3 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172201,14321.7 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,454.861,4.63547e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.220444,14471.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.179081,14471.7 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.67009,62297.1 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,140.383,1.42932e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.213221,14891.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.183014,14887.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,258.397,2.75077e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.216508,15089.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.176981,15093 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.4418,45302 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,108.651,1.18832e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.206623,15510.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.171596,15516.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,169.658,1.91587e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.210312,15607.1 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.159036,15609 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,170.257,1.91032e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.207119,15681.1 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.159353,15679.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.04852,31350.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.8856,758920 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.195292,15799.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.152092,15801.1 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,119.303,1.38995e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.205631,15950.3 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146229,15954.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,120.097,1.39083e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.20164,16125.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.148373,16120.1 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.32921,16122 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.3915,547948 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.178716,16108.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.139413,16108.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.2391,541629 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.159026,16059.1 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.140956,15241.1 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,45.4586,542408 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.163135,16044.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.14157,16036.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.342292,16029 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.412724,17430.4 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0718044,16025.1 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0707743,16032.7 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.190146,16007.7 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0562845,15969.7 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.53685,31872.4 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_267.txt deleted file mode 100644 index 1244da47a9f5f51489887a3bb1c97f9274c9ab3f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,48.3476,494655 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.199381,14333.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.17701,14322.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,455.144,4.63328e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217317,14442.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.182137,14450.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.75186,63638.8 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,140.298,1.42313e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.211183,14877.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.175836,14880.7 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,258.572,2.75308e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.213746,15082.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.179867,15087.9 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.54198,45280.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,108.426,1.18182e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.220027,15493.3 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.160268,15487.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,169.827,1.90984e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.212972,15587.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.163737,15584.1 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,169.68,1.91164e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.214572,15687 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.162475,15671.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.12413,31351.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.8137,771936 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.197829,15793.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.154044,15785.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,119.404,1.38237e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.200329,15955.9 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.151564,15940.7 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,120.341,1.4096e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.204632,16135.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.158905,16127.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.33683,16945.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.5145,548025 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.177717,16100.9 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.147538,16100.9 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.2055,540629 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.162277,16047.7 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.142898,16040 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,45.2164,540444 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.159416,16009.9 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.140572,16007.9 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.351511,16007.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.403469,17430.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0745818,16000.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0660413,16000.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.183557,15998.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0557373,15965.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50264,31135.3 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_268.txt deleted file mode 100644 index 83fb5487c921a1a851ea3249cbb1ea04376f2f92..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,46.9584,482258 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.188003,14421.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.172437,14408 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,454.234,4.64545e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.218751,14535 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.174255,14537 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.64122,63324.5 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,139.332,1.43474e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.208296,14950.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.178604,14925.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,256.972,2.74482e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.203167,15159 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.165541,15149.5 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.38018,44708.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,108.133,1.17891e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.197692,15569.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.155263,15556 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,168.781,1.91535e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.209381,15658.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.161279,15654.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,169.877,1.91614e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.200197,15749.7 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.162008,14958.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.01499,29922.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,67.6277,768950 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.186021,15854.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.147129,15854.5 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,118.852,1.38048e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.199301,16026.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.146917,16019.1 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,119.584,1.40499e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.200978,16202 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.148972,16203.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.2899,16198.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.1641,545492 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.173177,16188.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.138792,16171.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,44.9602,537586 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.15638,16133.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136962,16133.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,44.9592,538474 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.156457,16098.3 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.13454,16090.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.319717,16082.9 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.428202,17501.7 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0808825,16082.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0676285,16082.9 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.195874,16073.3 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.0597115,16055.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.50665,31335 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_269.txt deleted file mode 100644 index b1d756884be10fdeb15b96febf92375c5ffe95aa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,48.5249,496066 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.192949,14310.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.17317,14307.1 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,455.058,4.63206e+06 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.217346,14457.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.182591,14451.5 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,5.5584,60076.6 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,140.254,1.42519e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.212344,14875.1 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.176223,14878.9 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,258.159,2.73933e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.211557,15083.9 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.167519,15081.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,3.49324,44516.7 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,108.704,1.18796e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.202456,15498 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.164021,15486.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,169.675,1.90747e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.206693,15580.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.166293,15572.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,171.573,1.92222e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.205378,15648.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.158453,15648.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.0528,31303.5 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,68.2187,770960 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.189029,15786 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.145011,15780.3 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,119.598,1.39175e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.195483,15934.7 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.147113,15936.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,120.545,1.41227e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.198587,16099.3 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.149212,16085.9 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.34542,18520.9 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,46.2039,541168 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.170264,16085.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.142588,16060.7 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,45.0235,533320 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.16268,16011 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.136076,16011 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,44.8935,533198 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.159618,15988.5 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.138479,15988.5 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.328334,15980.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,0.415604,17399.6 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.0753406,15982.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.0624704,15982.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.185115,15950.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.054502,15934.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.49926,31108.7 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp32_perf_fp32_120.txt deleted file mode 100644 index ef02bae6041afdd39151c18c7f8c5dd9634b2602..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_cifar100/vgg16_cifar100_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,150 +0,0 @@ -Conv1,154.121,1.5352e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.214927,13584.1 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.136566,13578.3 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1325.28,1.13532e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.236611,11594.5 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.220182,11607.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,8.68528,74729.2 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,331.497,2.71722e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.216182,12478.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.124275,12482.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,432.035,4.00485e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.186777,13886 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.115949,13886.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,5.62394,57859.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,150.64,1.48113e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.226653,14698.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.171148,14679.5 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,288.325,3.0245e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.205001,15516.9 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.121619,15505.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,264.666,2.98708e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.214553,16309 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.164982,16309 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,2.61044,35073.9 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,112.96,1.29306e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.212323,16783.5 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.155334,16775.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,212.916,2.61474e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.217145,17265.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.15167,17260.3 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,198.796,2.50427e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.21831,17883.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.158639,17875.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,1.99705,35751.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,93.2211,1.15799e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.20279,17562.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.144169,17537.1 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,87.9998,1.12374e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.184723,17274.3 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.155062,17272.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,90.4476,1.13632e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.180918,17000.1 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.142857,17000.1 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,0.403823,16998.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,1.2391,19436.9 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,0.131792,16984.9 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.129337,16976.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,0.301634,16992.5 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.113517,16961.9 -Add15_f2h,0,0 -Add15_h2f,0,0 -Softmax1,1.2847,19459.8 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_120.txt deleted file mode 100644 index 589d3a4ac05f4b6c1ab01fac9f4d2a21357859d9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_120.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,455.534,4.66689e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.467397,28685.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.353119,28666.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,3585.02,3.23302e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.29772,26928.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.564293,25742 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,18.7601,185486 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,826.856,8.46898e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.449964,29892.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.359794,29870 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1786.12,1.82074e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.468817,29916.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.352812,29890.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,9.09037,107589 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,478.69,5.23312e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.41733,32107.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.244485,32102.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,831.879,9.90367e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.435262,33751.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.356005,33721.4 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,840.699,1.02418e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.452997,34772.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.353574,34718.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,6.35883,93697.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,323.333,4.00937e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.41452,35776.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.295627,35705 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,578.318,7.63711e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.437893,36712.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.327973,36701.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,584.029,7.80435e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.435525,37234.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.338002,37234.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.18449,72631.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,157.441,2.07545e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.419966,37340.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.301151,37340.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,153.651,2.09778e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.388005,37251.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.296562,37240.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,153.779,2.0978e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.392287,37089.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.295282,37094.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.64765,38909 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.2883,224476 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.52821,62175 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.284005,36871.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.55282,59052 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.598219,40063.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.268076,36932.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.37863,41627.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.353387,38523.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.025,504571 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_151.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_151.txt deleted file mode 100644 index 2971ceeea6c8dd895551ec1ad644e950a2c5bf0d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_151.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,422.717,4.83781e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.456913,31601.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.359327,31608.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1351.01,1.46175e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.470782,30563.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.368472,30559.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.228,130152 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,626.09,6.72717e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.451845,31359.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.295218,31306.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,648.454,7.58183e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.436287,33020.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.363244,33016.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.28285,102264 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,311.013,3.59628e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.434258,33894.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.335039,33833.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,428.34,5.33657e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.439071,34778.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.370583,34793.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,423.839,5.3622e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.43703,35548.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.364132,35537.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.63221,78037.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,212.129,2.72828e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.42188,36041 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.347705,36018.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,332.695,4.41954e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.431877,36682.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.34686,36667.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,332.14,4.46479e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.430725,37207.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.346392,37157.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.78426,74326.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,97.827,1.3133e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.388683,37420.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.306886,37344.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,92.2453,1.28561e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.354264,37397.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.297324,37378.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,92.9713,1.25568e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.34988,37401.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.289862,37405.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.47196,37393.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.902,217429 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.44447,61995.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.202687,37222.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.34326,58863.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.417669,40296.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.129376,37226.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,0.987714,41852.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.258476,38778.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.5777,499794 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_152.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_152.txt deleted file mode 100644 index 06209b398390083a7e26f5fdb4aeeda3cb209f85..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_152.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,419.903,4.80075e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.450078,31672 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.351653,31680.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1354.26,1.46644e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.473668,30639.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.374443,30632 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,10.7878,128710 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,629.827,6.75962e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.454795,31321.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.272812,31314.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,644.683,7.58624e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.443385,32989.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.358603,33001 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,8.00249,107248 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,308.4,3.61338e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.43619,33863.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.367026,33841.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,423.632,5.3215e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.435115,34743.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.367647,34759 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,421.208,5.34379e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.434041,35567 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.373271,35582.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.04297,72888.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,212.04,2.72156e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.417727,36076.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.337746,36061 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,330.815,4.41945e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.430757,36785.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.336632,36759 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,330.747,4.48595e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.42286,37325.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.342738,37287.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.48344,72664.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,97.3527,1.30997e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.392498,37432.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.301055,37432.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,93.0202,1.29313e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.354111,37462.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.294315,37436.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,92.5392,1.28842e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.349381,37493.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.306501,37485.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.38795,37474.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.395,220404 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.46958,61965 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.184447,37284.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.32265,58890.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.401323,40369.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.122387,37307.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.00145,43413.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.123065,37311 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.3043,513809 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_153.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_153.txt deleted file mode 100644 index 1643c576ca002cc4b8cc09c4a06b97c91f35457c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_153.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,397.099,4.49506e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.464709,31356.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.343282,31356.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1336.38,1.42703e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.470538,30228.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.378373,30205.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,10.9964,128980 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,626.936,6.64333e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.448285,30804.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.284934,30789 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,637.781,7.37639e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.444574,32500.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.362661,32508 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,8.10626,105809 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,302.937,3.50525e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.428292,33395.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.346002,33407.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,424.344,5.24194e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.434564,34333 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.35514,34283.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,417.409,5.23588e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.451583,35186 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.369868,35194.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.50473,80917.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,209.276,2.65827e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.414558,35686.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.341151,35683 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,329.01,4.37324e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.423173,36457.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.338411,36431 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,329.79,4.42685e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.436888,36984.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.340741,36969.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.87922,73954.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,97.0445,1.29492e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.391,37108.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.304063,37081.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,92.307,1.27898e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.350232,37150.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.299244,37104.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,92.5061,1.27908e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.339915,37192.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.301516,37135.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.49239,37135.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.0233,216437 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.44092,61606 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.191008,36964.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.32535,58478.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.399518,40008.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.130444,36984 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,0.999721,41606.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.237637,37084.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.9984,502847 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_154.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_154.txt deleted file mode 100644 index 8cbd143a8a8e2064f740d51b93696e84268031dd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_154.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,395.41,4.48572e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.459621,31490.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.364497,31468.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1328.47,1.42515e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.468785,30320.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.380856,30327.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,10.8877,127334 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,610.254,6.4922e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.458974,31088 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.279231,31080.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,634.81,7.41724e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.448696,32754.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.35749,32750.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,8.46483,113000 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,302.409,3.51463e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.433527,33631.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.345092,33616.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,421.615,5.24439e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.431966,34539.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.360196,34555.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,415.836,5.24529e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.437291,35324.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.373394,35309.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.32959,75931.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,209.433,2.67102e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.42028,35870 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.337439,35847.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,328.216,4.36791e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.427327,36583.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.348831,36576.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,328.848,4.39311e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.437989,37115.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.350975,37100 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.72423,74230.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,96.8875,1.30091e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.394866,37311.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.309835,37285.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,91.7807,1.28302e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.347807,37292.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.293189,37281.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,92.4581,1.28597e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.346776,37350 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.293771,37323.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.45509,37304.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.3207,217244 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.50188,61790.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.221772,37118.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.40265,58712.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.506123,40192.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.134297,37152.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,0.977603,41760 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.241554,38732 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.1237,504354 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_155.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_155.txt deleted file mode 100644 index 918a2cfce4ae2cdbe2512f329d6bc7d247e48b7c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_155.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,462.746,5.246e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.45827,31208 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.363135,31219.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1795.36,1.85564e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.500798,29407.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.361573,29400.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.464,135697 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,657.109,6.98681e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.442558,31339.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.285369,31297.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,814.728,9.52013e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.446494,33129.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.360696,33103.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.6773,104305 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,357.389,4.22886e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.430322,34345.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.25004,34334.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,525.941,6.72597e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.444279,35538.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.373759,35527 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,535.496,6.91661e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.457022,36269 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.364248,36254 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.64234,79736.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,226.052,2.92868e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.425669,36883.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.351659,36868.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,357.102,4.87983e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.429515,37632.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.347782,37610.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,357.738,4.92851e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.435691,38087.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.342629,38060.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.96874,74207.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,102.78,1.33726e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.406462,38231 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.317036,38219.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,99.1507,1.39934e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.367941,38262 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.301356,38246.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,99.6024,1.40954e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.364204,38247.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.297618,38232.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.44887,38244 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.5915,219518 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.50539,63628.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.263711,38024 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.47619,60419.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.46332,41238.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.137439,38072.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.01981,42813.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.266642,39639.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.0942,522733 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_156.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_156.txt deleted file mode 100644 index fba1d66b52595c5c4daed3631f4937073a8d1713..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_156.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,462.456,5.24759e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.470366,31236.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.386437,31233.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1786.08,1.85547e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.49011,29461.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.370277,29415.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.5777,141552 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,663.094,7.01318e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.437688,31327.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.275621,31346.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,823.458,9.69014e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.449541,33274.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.357054,33236.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.39743,99651.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,356.259,4.23242e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.423742,34467.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.252715,34452 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,521.528,6.72026e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.432165,35629.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.353637,35636.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,534.428,6.93684e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.434514,36372 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.349861,36364.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.33523,76385 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,264.163,3.45833e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.422949,37001.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.344703,37016.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,454.981,6.23324e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.432491,37652.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.341759,37618.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,449.836,6.22413e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.429278,38118.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.338193,38042 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.72389,74164 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,103.435,1.40574e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.407595,38286 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.311096,38241.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,98.8123,1.3937e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.364689,38296.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.299948,38281.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,98.9485,1.39474e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.362276,38295.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.305662,38280.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.43221,38258 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.6626,224054 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.4331,63837.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.27479,38030 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.47038,60583.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.449291,41256.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.136051,38082.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.031,42885 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.248115,39714.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.754,518805 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_157.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_157.txt deleted file mode 100644 index b282e13d3160a6dbc9ff539af043a14042c6591d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_157.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,468.293,5.32815e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.459716,31350.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.361305,31347 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1806.4,1.8695e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.484337,29411.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.36887,29407.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.5641,141800 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,659.204,7.01771e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.439812,31373.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.290232,31335.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,840.833,9.84169e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.452702,33122.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.358027,33099.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,9.36531,122385 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,357.921,4.22258e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.43866,34342.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.256524,34323.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,534.371,6.81454e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.443589,35461.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.37228,35465 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,542.12,6.99337e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.440734,36303.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.366692,36280.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.83277,81524.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,262.187,3.38728e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.423952,36929.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.352555,36906 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,451.982,6.17926e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.450955,37599.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.359506,37557 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,450.047,6.21311e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.429361,38157.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.346597,38058 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.9487,74195.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,157.702,2.17e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.404139,38346.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.313132,38289.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,152.988,2.17418e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.376485,38459.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.30309,38429.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,153.741,2.19347e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.374002,38562.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.297509,38559 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.52252,42397.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,20.9558,246174 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.50233,63426.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.266917,38277.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.56151,60267 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.508606,41435 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.152332,38311.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.08275,44550.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.201663,38348.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.6795,520432 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_158.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_158.txt deleted file mode 100644 index 1d3d7298488d10c4b14b5adf33424f91d23eddad..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_158.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,440.391,4.93952e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.457003,30781 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.393682,30762 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1768.36,1.80247e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.473995,28941 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.368388,28918.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.2497,134357 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,637.755,6.69794e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.437727,30922.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.274207,30906.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,807.055,9.28151e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.452458,32744 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.353163,32717.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.66591,104685 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,348.993,4.06804e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.432177,33951.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.264075,33940.4 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,516.937,6.57625e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.438206,35229.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.354039,35203.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,527.592,6.77733e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.445438,36025.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.363307,36010.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.53962,82711.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,223.042,2.87593e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.434948,36631 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.340484,36638.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,354.42,4.80514e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.433405,37407.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.343896,37408 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,355.301,4.87592e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.431262,37947.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.359659,37924.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.53255,68235.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,103.145,1.4059e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.396575,38067 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.320997,38049.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,97.3921,1.38125e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.364581,38127.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.291557,38102 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,98.0876,1.38361e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.37388,38184.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.300728,38139.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.45106,40011.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.5523,230987 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.50426,63542 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.26862,37919.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.47382,60366.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.445022,41136.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.128671,37939.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.03307,42710.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.304607,39548.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.6686,514672 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_159.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_159.txt deleted file mode 100644 index 6008d5f448fea1da8a972d0261a0f86c061f4297..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_159.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,442.356,4.97196e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.462584,30844.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.370206,30817.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1768.67,1.8098e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.475421,28921.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.360697,28918.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,14.2141,156890 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,645.945,6.74373e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.444785,30872.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.260088,30830.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,813.457,9.41369e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.445118,32775.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.369771,32787.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,8.4375,109650 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,348.398,4.06891e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.430449,33993.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.264927,33977.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,517.785,6.58243e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.440625,35167.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.365381,35167.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,524.29,6.73461e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.442187,36030 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.361554,36018.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.41749,75571 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,259.883,3.36736e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.422353,36693 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.349579,36677.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,448.786,6.09741e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.426001,37333.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.34275,37318.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,443.92,6.11344e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.438495,37857.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.35484,37851.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.82873,75683.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,101.987,1.38326e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.404247,38031.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.308447,38020.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,98.3819,1.38671e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.363224,38076.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.296389,38061.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,98.0606,1.38627e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.360787,38114.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.309348,38054.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.47454,38027.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.686,221396 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.53037,63461 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.271225,37853 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.46261,60279 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.453169,41083.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.129932,37932.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.00204,42720 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.265541,39542.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.0496,515967 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_160.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_160.txt deleted file mode 100644 index cf13d7359db25c20ce331a4114a8945cdad1872a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_160.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,439.53,4.95415e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.470738,30974.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.357822,30978.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1775.42,1.8209e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.461854,29032 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.355947,29020.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.3663,136579 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,649.944,6.83858e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.52767,30841.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.340959,30845.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,816.027,9.38261e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.45667,32764.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.379954,32756.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.26986,94885.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,351.701,4.11514e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.413343,34040.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.251686,34032.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,521.937,6.63458e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.442014,35203 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.36243,35210.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,530.806,6.8045e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.436114,36034 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.377022,35999.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.34536,82602.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,258.532,3.34657e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.429067,36681.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.346635,36655 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,443.115,5.97557e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.438622,37390.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.346532,37402.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,444.201,6.10648e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.432062,37943.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.343135,37870.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.93087,77591 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,156.695,2.14927e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.406808,38181.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.315108,38185.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,152.801,2.15966e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.369144,38314.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.295499,38310.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,152.641,2.16467e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.372076,38386 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.292274,38325.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.4523,38329 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.5014,230710 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.51477,63950.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.305273,38109 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.46625,59128.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.578813,41342.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.152736,38150 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.034,42990.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.241906,38183 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.7851,517607 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_161.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_161.txt deleted file mode 100644 index 233279f0829fbf4e776cb877d053dc49184c9971..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_161.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,478.02,5.46943e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.465918,31498.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,1.0835,37990 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2012.03,2.06938e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.467992,28891.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.371358,28918.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,10.9606,124367 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,658.983,6.96979e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.453086,31441.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.310206,31441.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,897.926,1.04895e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.446583,33380.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.361222,33392 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.61052,105274 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,381.524,4.56242e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.421905,34695.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.272114,34667.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,582.714,7.53659e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.436203,35850.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.391563,35839.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,589.591,7.67717e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.430814,36612.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.354577,36589.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.17412,76754.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,269.766,3.53152e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.431499,37341 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.341713,37329.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,459.852,6.34445e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.43468,38053 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.344503,38045.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,457.161,6.36878e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.428471,38513 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.342924,38482.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.06549,76943.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,158.162,2.19765e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.401719,38764.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.308338,38737.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,153.356,2.19658e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.374117,38775 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.308625,38775 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,153.861,2.19994e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.375191,38816.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.307033,38805.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.61365,40707.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.921,229242 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.52435,64934 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.284293,38565.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.55004,61680.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.469035,41867.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.152223,38580.8 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.1082,43465.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.25902,40228.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.3303,523513 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_162.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_162.txt deleted file mode 100644 index 48c3d43c77cb5a825c84d99ee0e66c50ded6cf45..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_162.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,475.325,5.4428e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.471466,31519.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.3742,31462.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2020.22,2.0755e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.508951,28887.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.358884,28857.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.2409,124215 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,656.323,7.02688e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.44049,31453.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.299179,31411.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,892.972,1.04325e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.451563,33487.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.363717,33460.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,6.78994,95413.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,381.923,4.57262e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.423358,34762.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.239269,34735.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,577.293,7.4957e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.445502,35934.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.365464,35915.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,593.824,7.74848e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.451172,36608.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.355052,36627.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.24424,78714.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,268.715,3.53072e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.436618,37359.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.336767,37344.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,455.751,6.29107e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.434417,38081.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.349431,38073.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,466.098,6.50048e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.443306,38491.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.344352,38423.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.05167,78744.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,157.831,2.18693e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.412754,38725.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.305323,38664.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,154.577,2.20612e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.381516,38732.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.30065,38705.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,153.16,2.19708e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.375505,38816.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.30179,38805 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.56629,44533 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.5104,234215 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.4864,64875.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.288939,38557.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.54192,61492.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.520995,41833.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.17054,38584.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.08611,43484.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.243653,38599.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.679,521041 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_163.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_163.txt deleted file mode 100644 index 28c204df0fc5274e49e7cb1ffd7818470b709b15..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_163.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,479.238,5.47122e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.472881,31532 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.370654,31509.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2009.93,2.071e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.475536,28956.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.372708,28906.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.867,128952 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,655.007,7.00299e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.455499,29912.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.306564,29904.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,891.666,1.04793e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.450814,33510.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.359422,33479.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.20779,98681.2 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,383.212,4.59584e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.426141,34769.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.252728,34731.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,581.782,7.53428e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.445591,35919.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.357694,35938.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,594.196,7.74715e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.440305,36627.2 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.367193,36619.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.3188,78652.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,267.855,3.51516e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.435704,37363.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.352267,37302.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,463.867,6.40406e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.437777,37971.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.349809,37964.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,464.395,6.47535e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.432613,38369 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.346564,38376.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.87553,74836.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,161.566,2.2398e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.412286,38661.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.310443,38623.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,156.559,2.23724e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.383142,38736.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.310756,38721.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,156.895,2.24982e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.389003,38851.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.309067,38789.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.54096,40718.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.4789,233751 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.53203,64756 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.274219,38557.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.58088,61451.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.516374,41810.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.200409,38580.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.14305,43454.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.266034,40205.4 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.769,521154 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_164.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_164.txt deleted file mode 100644 index ba32130b28fe4be08803034683f3371ea182602e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_164.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,477.833,5.45557e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.469412,31456.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.36227,31464.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2015.78,2.06643e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.47699,28845.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.372528,28838.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.1428,121236 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,652.718,7.00219e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.456913,31419.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.311314,31381.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,900.165,1.05334e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.445285,33365.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.354392,33323.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,8.12858,110012 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,380.239,4.53905e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.428068,34643.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.24837,34635.8 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,587.874,7.54955e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.441552,35736.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.36629,35732.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,587.373,7.61664e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.442706,36554 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.35916,36535 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.84492,85826.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,267.762,3.51378e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.433412,37242 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.351883,37253.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,459.17,6.29912e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.438225,37946.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.353125,37939.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,459.939,6.39416e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.432228,38460.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.360254,38442.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.37304,76782.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,159.158,2.20692e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.417258,38618.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.322084,38588.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,158.785,2.26098e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.38058,38774.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.303506,38733 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,156.775,2.24646e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.38451,38782.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.314642,38752.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.54075,38764 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.7869,243173 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.41893,64810.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.266853,38532 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.55642,61472 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.51491,41799.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.189989,38531.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.10684,43410.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.262994,40183 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,35.1529,527155 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_165.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_165.txt deleted file mode 100644 index c59e479d329481849d8d3dac85ceab6dacb695fd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_165.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,455.03,5.1799e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.474071,31222 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.357803,31222 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1993.53,2.02599e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.490026,28544.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.389419,28517.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.0146,129931 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,635.387,6.69338e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.438711,31119.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.293996,31131.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,878.929,1.02748e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.453002,33173.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.365246,33177.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.53228,101166 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,372.759,4.41161e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.422795,34486 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.256888,34444.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,575.548,7.3956e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.439851,35651.8 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.364133,35613.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,581.808,7.49618e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.443313,36417.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.357905,36413.8 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.62854,85542.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,264.059,3.44035e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.440401,37144.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.348472,37117.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,450.917,6.16922e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.433373,37960.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.355365,37911.2 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,457.997,6.36621e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.432715,38355.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.349317,38348.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.7753,74725.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,157.693,2.18022e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.41155,38563.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.322738,38552.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,152.831,2.1846e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.374059,38631.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.302943,38619.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,153.721,2.20276e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.372421,38657.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.299263,38642.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.55718,42488.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.8352,238008 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.50222,64212.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.273887,38394.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.5249,61001.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.458295,41641 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.154111,38444.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.06721,43254.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.23811,38467.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.8366,521695 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_166.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_166.txt deleted file mode 100644 index c64f4f3e52e6fa04b95b4612aa5c3bcb4a4c3ed2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_166.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,455.276,5.15856e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.470359,31149.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.386129,31145.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1996.3,2.01844e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.480478,28456.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.364843,28445 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,13.1877,143849 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,638.453,6.70373e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.431281,31082 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.261586,31043.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,880.677,1.02525e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.45194,33154 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.348607,33101 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.77753,104127 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,373.494,4.4186e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.434212,34390.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.239826,34384.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,572.542,7.35019e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.445362,35601.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.361758,35590 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,587.187,7.55765e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.432727,36379.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.345611,36349.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.38383,78143.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,265.481,3.44615e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.43203,37082 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.340134,37047.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,457.768,6.28078e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.433913,37802.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.337374,37742.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,461.356,6.38625e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.448081,38215.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.334123,38165.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.02037,76324.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,157.571,2.17779e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.412523,38442.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.308018,38415.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,151.67,2.1545e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.382015,38556.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.297944,38510.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,152.985,2.18104e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.364958,38608.6 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.292722,38593.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.54659,42345.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.0312,230236 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,3.12421,72351.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.266975,38342.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.50593,61281.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.462366,41629.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.160268,38415.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.0812,43265.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.265125,40039.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.389,510390 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_167.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_167.txt deleted file mode 100644 index 54673f6021e799da059454b88b7fa00f4037aa37..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_167.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,451.842,5.16033e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.452958,31297.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.357886,31285.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1989.47,2.03189e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.480286,28587 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.363979,28587 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.3533,134286 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,637.258,6.74518e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.456464,31197.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.29685,31158.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,868.231,1.01561e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.513251,33295.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.366276,33295.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,6.86558,94687.4 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,373.103,4.43476e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.435043,34535 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.254092,34509.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,568.595,7.33462e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.442815,35774.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.367025,35762.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,581.724,7.54411e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.44195,36516.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.376818,36459.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.18811,76563.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,263.492,3.43995e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.437937,37228.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.350263,37225.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,456.802,6.28936e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.445771,37918.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.348363,37907.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,455.376,6.28292e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.437924,38408.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.347474,38374.6 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.87436,78635.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,160.082,2.21344e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.412855,38662.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.320402,38616.6 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,153.655,2.19322e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.37941,38745.6 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.300702,38715.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,157.898,2.25175e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.397201,38870.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.304267,38817 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.56307,42609.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.2303,233792 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.48179,65167.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.267499,38554.4 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.50431,60194.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.645949,43541.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.222649,38638.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.18469,43603.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.268466,40305.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.5977,523272 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_168.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_168.txt deleted file mode 100644 index 89db826f20bbc72ed4380b8ba9dbbe7525a4e4a8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_perf_fp16_168.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,455.361,5.15884e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.462295,31145.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.348824,31137.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1995.56,2.02344e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.491447,28479.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.384536,28460.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,12.3928,132727 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,636.833,6.72095e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.445655,31089.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.272409,31051.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,879.67,1.02163e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.445156,33127.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.359966,33135.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.38661,102607 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,372.157,4.40128e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.429316,34394.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.24878,34356.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,572.823,7.33369e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.44659,35587 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.366212,35563.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,587.311,7.54241e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.439447,36326.6 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.355396,36288.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.36879,76206.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,263.153,3.41952e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.426865,37082.6 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.345381,37032.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,455.644,6.22858e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.428452,37785.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.345034,37743.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,461.078,6.37041e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.441771,38219 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.34899,38181.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.02953,76389.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,159.459,2.18848e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.419249,38479.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.302764,38465 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,153.928,2.19033e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.390789,38585.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.295538,38528 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,155.492,2.2155e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.381342,38626 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.306968,38599.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.53701,40509.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.9248,227446 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.4997,64466 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.274335,38386.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.54606,61207.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.516683,41628.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.227859,38394.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.15581,43260.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.258853,40068.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.6115,519004 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_261.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_261.txt deleted file mode 100644 index e4548f7e02c2297a343cf2900af977e1c3ae1ccb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_261.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,158.533,1.70071e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.608726,29506.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.700086,29513.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1612.88,1.72645e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.440343,30937.8 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.360055,30937.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,48.8482,521160 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,617.178,6.62728e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.392593,31410 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.261708,31364 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,838.687,9.64846e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.405412,33103.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.32588,33092.4 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.15102,94244 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,312.613,3.66479e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.400798,33924.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.366008,33901.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,561.898,6.91021e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.40202,34422.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.352472,34388.2 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,566.636,7.01164e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.41356,34754.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.360101,34720 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.63076,78106 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,222.176,2.74714e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.404613,35239.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.338552,35227.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,370.64,4.79864e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.399108,35681.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.356363,35654.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,375.592,4.86329e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.403716,35914.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.348452,35899.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.77606,68143 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,107.212,1.38908e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.375371,35974.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.30003,35928.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,104.675,1.38702e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.347679,35823.2 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.290034,35804 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,103.943,1.3766e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.344331,35724.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.308018,35697.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.36247,35674.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.6724,208084 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.51761,59723.4 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.251244,35503 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.44899,56794.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.496343,38581 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.122719,35568.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,0.966106,40148.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.239468,35621.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.9406,481371 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_262.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_262.txt deleted file mode 100644 index 10e7503edfe46bb07fa6346a48480c1fdd62264b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_262.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,161.775,1.71091e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.646858,29303.2 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.70038,29269 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,1616.13,1.72554e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.444843,30853 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.378718,30849.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,43.8828,470734 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,622.413,6.66894e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.4006,31322.2 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.240914,31291.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,840.715,9.67157e+06 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.409157,32995.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.324543,32965.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.28431,95528.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,313.057,3.66414e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.407569,32159.2 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.347211,32132.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,565.199,6.98615e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.412817,34278 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.376728,34278 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,571.402,7.08326e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.415205,34648 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.359423,34609.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.42833,77877.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,223.315,2.77603e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.402314,35079.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.338814,35048.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,374.653,4.82776e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.403198,35528.6 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.34396,35471.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,376.729,4.8737e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.410859,35773.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.336914,35770.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.95248,73240.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,108.964,1.40477e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.385227,35845.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.302072,35815.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,104.682,1.38706e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.354814,35643.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.29758,35620.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,105.504,1.38483e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.347557,35540.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.292427,35513.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.37679,37298.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.9789,220505 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.42912,59088.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.255692,35347 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.37765,56103 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.405227,38306.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.124992,35373.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.00088,39794.4 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.245631,36880.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.8187,480667 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_263.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_263.txt deleted file mode 100644 index b7b8bf2f6395e09572921abd7b0d7af36b1fd02b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_263.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,191.244,2.05055e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.427005,29448.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.405195,29437 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2487.73,2.6007e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.453194,31370.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.363793,31294.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,18.1172,201366 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,828.895,9.10771e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.399441,32356.4 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.239199,32322.6 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1237,1.45583e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.426282,34091.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.359966,34068.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.02861,98706.8 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,417.602,5.03829e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.384861,35003.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.244683,35000 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,771.959,9.86912e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.41635,35389.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.364319,35370 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,793.706,1.00655e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.41605,35386.8 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.35642,35379 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.2233,76126 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,288.912,3.68753e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.425668,35922.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.336254,35900.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,509.037,6.70579e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.416388,36378.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.337548,36369 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,511.52,6.73398e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.42227,36633.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.346916,36595 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.88723,73228 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,142.071,1.87156e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.390155,36636.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.299205,36609.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,141.007,1.89372e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.373725,36482.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.978996,43770.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,140.041,1.87996e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.363351,36285.4 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.313297,36258.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.47037,41611.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.1496,218005 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.76381,64189.2 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.273976,36041.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.61064,57895.6 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.561251,39172 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.161829,36113 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.09352,40776.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.272631,37718.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.759,490105 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_264.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_264.txt deleted file mode 100644 index eba44020106d7110831698fe6797c3e08759b3c4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_264.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,185.939,2.01791e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.449643,30210.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.343512,30229.4 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2501.14,2.62453e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.451575,31454.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.368913,31385.8 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,17.6366,203769 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,840.18,9.23985e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.392555,32314 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.230943,32298.8 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1241.48,1.46389e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.422672,34077.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.344183,34043 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,6.79683,93570.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,419.176,5.00392e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.386577,35025.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.247319,34995 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,774.162,9.89205e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.427769,35412 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.368358,35370 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,797.068,1.00928e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.419845,35440 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.354776,35386.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,4.93236,70696.2 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,288.804,3.67853e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.409738,35955.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.348677,35956.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,508.633,6.69683e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.408344,36369.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.33811,36342.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,512.264,6.77793e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.409164,36564.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.348613,36515 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.76312,73057.8 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,142.653,1.88521e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.388529,36604.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.305624,36597 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,141.645,1.91591e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.364497,36482.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.297535,36470.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,140.293,1.88476e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.363978,36259.2 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.309911,36259.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.44807,39896.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.5247,220418 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.52067,60871 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.276351,36098.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.46145,56290 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.552175,39172.2 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.147897,36112.2 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.08342,40749.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.267576,37671.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.5875,488365 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_265.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_265.txt deleted file mode 100644 index db65e012b6c594f9919d33e8de8ba1edbe30485f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_265.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,181.593,1.97488e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.435338,30282.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.362987,30290 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2487.56,2.62436e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.451518,31514.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.359396,31522.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,16.0458,183438 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,839.503,9.23851e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.406814,32390.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.239935,32379.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1231.36,1.45609e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.41331,34183.2 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.342853,34141 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,6.27933,90448.6 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,418.496,5.08746e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.379108,35090.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.259602,35079.2 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,771.965,9.87368e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.412676,35435.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.359735,35438.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,792.082,1.00742e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.418122,35520 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.345508,35493.4 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,4.98809,76145 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,288.537,3.68433e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.412907,36023 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.334289,35993.8 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,507.543,6.69858e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.412267,34598.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.356229,34590.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,511.552,6.75649e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.404273,36646.4 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.328203,36619.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.74345,73209.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,141.885,1.87443e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.382647,36661 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.30273,36634.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,141.71,1.88134e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.370231,36525.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.291647,36506.8 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,139.32,1.87431e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.369893,36344.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.297732,36344.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.35279,36314.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,18.1132,230015 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.70198,62321.6 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.268184,36131.6 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.56545,57748.8 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.531895,39215.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.159673,36175.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.09838,40793.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.305682,37742.8 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.577,493091 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_266.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_266.txt deleted file mode 100644 index 2e8956e67f1a0a2f3eb8cb56b93c442a477407e5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_266.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,173.636,1.88423e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.382109,29883.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.23427,29876 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2203.56,2.29131e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.443966,30586.2 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.357886,30563.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.8112,129441 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,808.594,8.60936e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.406653,31346.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.229196,31293.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1103.71,1.26443e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.39701,33358.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.263282,33339.2 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.49989,104971 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,385.211,4.5293e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.371255,34566.4 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.239333,34562.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,720.44,9.09088e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.433539,35193.2 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.353361,35208.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,733.435,9.28106e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.409885,35524.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.363396,35448 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.01246,74445.8 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,279.884,3.55576e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.407653,36195.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.350302,36172.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,492.124,6.51622e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.410564,36792 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.338763,36730.8 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,489.535,6.56928e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.406942,37234.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.340312,37223.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.98477,72556.2 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,140.565,1.86739e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.393834,37238.6 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.301565,37207.8 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,134.25,1.83479e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.373726,37082 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.298366,37074.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,137.186,1.85803e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.368363,36891.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.304748,36876.6 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.31005,36876.6 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.7354,228173 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.44129,61585.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.279455,36686 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.60228,58413 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.599172,39775.4 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.226284,36739.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.20536,41365.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.290533,38291.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.3464,493073 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_267.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_267.txt deleted file mode 100644 index 5f30e7718826a5a44d04258d0a461470bf361dfc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_267.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,173.43,1.88699e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.432465,29975.4 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.265816,29906.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2189.87,2.27982e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.447863,30708.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.377387,30700.6 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.7131,130270 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,811.672,8.66601e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.409406,31388.6 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.24188,31392.2 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1087.52,1.25855e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.404529,33567.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.265292,33563.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.50409,105686 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,385.257,4.55787e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.402116,34794.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.244031,34768 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,716.365,9.13693e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.411127,35388 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.363588,35380.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,728.465,9.28599e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.424139,35713 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.358654,35705 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.58631,81955.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,279.577,3.55842e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.406104,34465.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.345726,34461.6 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,490.52,6.53554e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.411645,36880.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.342597,36876.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,489.563,6.56559e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.407865,37260.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.339583,37272.2 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.94554,70759.4 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,139.657,1.85316e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.406366,37318.4 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.305451,37299.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,134.329,1.84175e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.377727,37139.8 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.305733,37128.2 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,136.158,1.85446e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.384023,37040.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.294015,37029.4 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.35178,37033.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.9017,220361 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.51832,62185.8 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.266738,36835.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.56782,59012.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.579926,39988.8 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.24677,36861.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.23178,41586.6 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.293713,38444.2 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.6911,500087 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_268.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_268.txt deleted file mode 100644 index a1c035492a86751cb107c73d6ff19d309b9d549f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_268.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,172.568,1.85223e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.489962,29910.6 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.351377,29906.8 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2199.18,2.29044e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.448868,30589.6 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.352606,30593.2 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,11.7867,131114 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,815.728,8.70656e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.397655,31312.8 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.221695,31286 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1090.71,1.25589e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.40275,33476.8 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.249778,33457.8 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.08774,100305 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,385.327,4.53715e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.375781,32933.8 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.228966,32937.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,720.12,9.15787e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.41354,35222.4 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.365994,35242 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,727.57,9.21097e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.430333,35650.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.345554,35628.2 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.28686,78318.6 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,281.544,3.586e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.403525,36241.2 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.328901,36238.4 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,489.049,6.51004e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.41068,36818.8 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.341932,36826.4 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,491.617,6.59014e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.413425,37181.6 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.345412,37155 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,3.67613,74222.6 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,138.692,1.8372e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.39196,37215.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.299634,37189.2 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,134.834,1.84099e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.367915,37044.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.289253,37002 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,135.723,1.85095e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.384183,36883.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.292472,36856.8 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.37167,36868.2 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,16.7821,218357 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.44907,61929 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.268958,36693.2 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.58351,58774.4 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.565629,39812.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.213682,36708.4 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.18764,42986 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.152147,36769.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.6579,496474 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_269.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_269.txt deleted file mode 100644 index ed7cda546e596b4f5d9a42cda6899c1973f370b5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp16_samp_fp16_269.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,177.146,1.92423e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.389764,29834 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.235153,29830.2 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,2208.16,2.28783e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,0.449534,30547.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.395057,30539.4 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,13.5003,147919 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,807.798,8.61431e+06 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.400164,31351 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.23141,31332 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,1093.81,1.25446e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.415601,33495.4 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.306174,33491.6 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,7.76251,107105 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,384.777,4.55696e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.38947,34718.6 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.231743,34680.6 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,717.842,9.11881e+06 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.41612,35364 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.359,35337.6 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,732.336,9.28302e+06 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.417719,35656 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.352651,35602.6 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,5.13727,74716.4 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,279.723,3.56724e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.404785,34507.8 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.327915,34500.2 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,493.197,6.53166e+06 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.404516,36769.4 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.347365,36761.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,491.166,6.59488e+06 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.419332,37215.2 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.34819,37192.4 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,4.09067,72479 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,140.255,1.87033e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.391978,37234.2 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.307012,37211.4 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,135.678,1.82971e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.373272,37059 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.294117,37028.6 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,137.144,1.86828e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.36892,36914.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.294406,36903.2 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,2.33905,36899.4 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,17.0487,221581 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,2.49858,62079 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.26908,36728 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,2.60495,58928.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.591433,39885.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.24277,36751 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.24619,41498.8 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.295,38375.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,34.9505,503463 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp32_perf_fp32_120.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp32_perf_fp32_120.txt deleted file mode 100644 index e5708b3dbd6d733bf6b9af75952d72bb0bb9f974..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/profiling_results/vgg16_imagenet/vgg16_imagenet_fp32_perf_fp32_120.txt +++ /dev/null @@ -1,159 +0,0 @@ -Conv1,669.347,5.79186e+06 -Conv1_f2h,0,0 -Conv1_h2f,0,0 -Add1,0.44224,25607.8 -Add1_f2h,0,0 -Add1_h2f,0,0 -Relu1,0.340281,25592.6 -Relu1_f2h,0,0 -Relu1_h2f,0,0 -Conv2,7139.74,5.46598e+07 -Conv2_f2h,0,0 -Conv2_h2f,0,0 -Add2,1.47909,22940.4 -Add2_f2h,0,0 -Add2_h2f,0,0 -Relu2,0.578853,20923 -Relu2_f2h,0,0 -Relu2_h2f,0,0 -Pool1,60.6459,467598 -Pool1_f2h,0,0 -Pool1_h2f,0,0 -Conv3,1541.99,1.25939e+07 -Conv3_f2h,0,0 -Conv3_h2f,0,0 -Add3,0.449702,25722 -Add3_f2h,0,0 -Add3_h2f,0,0 -Relu3,0.310771,25726.4 -Relu3_f2h,0,0 -Relu3_h2f,0,0 -Conv4,2598.25,2.36776e+07 -Conv4_f2h,0,0 -Conv4_h2f,0,0 -Add4,0.799391,29436.6 -Add4_f2h,0,0 -Add4_h2f,0,0 -Relu4,0.599462,29567 -Relu4_f2h,0,0 -Relu4_h2f,0,0 -Pool2,21.6318,230524 -Pool2_f2h,0,0 -Pool2_h2f,0,0 -Conv5,744.163,7.87985e+06 -Conv5_f2h,0,0 -Conv5_h2f,0,0 -Add5,0.431494,32244 -Add5_f2h,0,0 -Add5_h2f,0,0 -Relu5,0.286745,32244 -Relu5_f2h,0,0 -Relu5_h2f,0,0 -Conv6,1576.37,1.74915e+07 -Conv6_f2h,0,0 -Conv6_h2f,0,0 -Add6,0.467628,33194.6 -Add6_f2h,0,0 -Add6_h2f,0,0 -Relu6,0.387506,33190.8 -Relu6_f2h,0,0 -Relu6_h2f,0,0 -Conv7,1744.67,1.92649e+07 -Conv7_f2h,0,0 -Conv7_h2f,0,0 -Add7,0.43932,32155.4 -Add7_f2h,0,0 -Add7_h2f,0,0 -Relu7,0.295117,32174 -Relu7_f2h,0,0 -Relu7_h2f,0,0 -Pool3,11.0778,134872 -Pool3_f2h,0,0 -Pool3_h2f,0,0 -Conv8,493.441,5.963e+06 -Conv8_f2h,0,0 -Conv8_h2f,0,0 -Add8,0.433138,35087.4 -Add8_f2h,0,0 -Add8_h2f,0,0 -Relu8,0.380959,35076 -Relu8_f2h,0,0 -Relu8_h2f,0,0 -Conv9,910.678,1.20378e+07 -Conv9_f2h,0,0 -Conv9_h2f,0,0 -Add9,0.450066,38405.2 -Add9_f2h,0,0 -Add9_h2f,0,0 -Relu9,0.407878,38378.6 -Relu9_f2h,0,0 -Relu9_h2f,0,0 -Conv10,897.248,1.2633e+07 -Conv10_f2h,0,0 -Conv10_h2f,0,0 -Add10,0.449388,40364.8 -Add10_f2h,0,0 -Add10_h2f,0,0 -Relu10,0.386828,40322.8 -Relu10_f2h,0,0 -Relu10_h2f,0,0 -Pool4,6.03651,96459 -Pool4_f2h,0,0 -Pool4_h2f,0,0 -Conv11,246.238,3.40541e+06 -Conv11_f2h,0,0 -Conv11_h2f,0,0 -Add11,0.428479,40601.8 -Add11_f2h,0,0 -Add11_h2f,0,0 -Relu11,0.351365,40546 -Relu11_f2h,0,0 -Relu11_h2f,0,0 -Conv12,238.715,3.44638e+06 -Conv12_f2h,0,0 -Conv12_h2f,0,0 -Add12,0.414546,40587.4 -Add12_f2h,0,0 -Add12_h2f,0,0 -Relu12,0.273535,40563.4 -Relu12_f2h,0,0 -Relu12_h2f,0,0 -Conv13,236.791,3.49246e+06 -Conv13_f2h,0,0 -Conv13_h2f,0,0 -Add13,0.413625,40604.8 -Add13_f2h,0,0 -Add13_h2f,0,0 -Relu13,0.31061,40577 -Relu13_f2h,0,0 -Relu13_h2f,0,0 -Pool5,3.85133,81098.8 -Pool5_f2h,0,0 -Pool5_h2f,0,0 -Mul1,30.3164,365908 -Mul1_f2h,0,0 -Mul1_h2f,0,0 -Add14,3.40558,74419 -Add14_f2h,0,0 -Add14_h2f,0,0 -Relu14,0.301913,40155.8 -Relu14_f2h,0,0 -Relu14_h2f,0,0 -Mul2,4.51905,81648.2 -Mul2_f2h,0,0 -Mul2_h2f,0,0 -Add15,0.766091,44549.6 -Add15_f2h,0,0 -Add15_h2f,0,0 -Relu15,0.310483,40165.6 -Relu15_f2h,0,0 -Relu15_h2f,0,0 -Mul3,1.77246,50359.2 -Mul3_f2h,0,0 -Mul3_h2f,0,0 -Add16,0.339148,40173.6 -Add16_f2h,0,0 -Add16_h2f,0,0 -Softmax1,65.736,976903 -Softmax1_f2h,0,0 -Softmax1_h2f,0,0 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet/alexnet_valid_soc.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet/alexnet_valid_soc.txt deleted file mode 100644 index 1b7aeb981c745717c52c841f99672cfbd532f7cb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet/alexnet_valid_soc.txt +++ /dev/null @@ -1,231 +0,0 @@ -2725.121326 -+++++ -conf1 1 1 78.78 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf2 2.1233638648528457 1.6150951710244676 78.3544 0.42560000000000286 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf3 2.051295134864554 1.6122580072322763 78.3278 0.4522000000000048 -1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf4 2.188609573694276 1.688911612634961 78.30120000000001 0.47879999999999256 -1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf5 2.0570505767108007 1.6000014977491621 78.2214 0.5585999999999984 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 265 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf6 2.009166522889861 1.5755494376470724 78.1948 0.5852000000000004 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf7 2.0188668300066377 1.5976556515195433 78.06179999999999 0.7182000000000102 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf8 2.1797184471932716 1.6767378001241562 78.06179999999999 0.7182000000000102 -1 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf9 2.064914192886025 1.6203964986881603 78.06179999999999 0.7182000000000102 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf10 2.2070171560926672 1.7194657877315815 78.0352 0.7447999999999979 -1 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 265 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf11 2.0161469236407057 1.5964768988685245 78.0086 0.7713999999999999 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf12 2.157846755426679 1.6765250202752133 78.0086 0.7713999999999999 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf13 2.0319664118931096 1.6183541826275754 77.98200000000001 0.7979999999999876 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf14 2.354997704376988 1.7779732164691666 77.98200000000001 0.7979999999999876 -1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv fp16 12 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf15 2.3463673263694 1.8510470086526165 77.98200000000001 0.7979999999999876 -1 gpu conv samp_fp16 264 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf16 2.284714727579521 1.7855758235498087 77.7692 1.0108000000000033 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -5 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf17 2.3463673263694 1.8510470086526165 77.68939999999999 1.0906000000000091 -1 gpu conv samp_fp16 264 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf18 2.427840309027486 1.9007943438562696 77.68939999999999 1.0906000000000091 -1 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf19 2.4671009475732766 1.9246545843862224 77.47659999999999 1.3034000000000106 -1 gpu conv samp_fp16 264 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf20 2.5567127702266332 1.9773019485322874 77.2638 1.5161999999999978 -1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf21 2.557898283218207 1.9895818051250724 77.2372 1.5427999999999997 -1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf22 2.557898283218207 1.9895818051250724 77.21060000000001 1.5693999999999875 -1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ -+++++ -conf23 2.6457265307759883 2.029290916760937 77.1574 1.6226000000000056 -1 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -6 gpu mul fp16 12 add fp16 12 -7 gpu softmax fp16 12 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet2/alexnet2_valid_soc.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet2/alexnet2_valid_soc.txt deleted file mode 100644 index a888b5ee5a50d140f60d6579a3f6bdb6aa5ddfbd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet2/alexnet2_valid_soc.txt +++ /dev/null @@ -1,188 +0,0 @@ -1129.3450630000002 -+++++ -conf1 1 1 84.76 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 -6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 11 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf2 2.2258170210610477 1.3875307929727092 84.74 0.020000000000010232 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 151 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf3 2.3673182996864846 1.4566777038051897 84.49999999999999 0.2600000000000193 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf4 2.24614762418964 1.41800542976017 84.25999999999999 0.5000000000000142 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf5 2.304084258604824 1.4284953488024343 84.228 0.5320000000000107 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 151 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf6 2.3377766277342653 1.4440340860007412 84.228 0.5320000000000107 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -6 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf7 2.24614762418964 1.41800542976017 84.17479999999999 0.5852000000000146 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf8 2.3673182996864846 1.4566777038051897 84.095 0.6650000000000063 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf9 2.2463714607055545 1.417884448648111 83.8024 0.9575999999999993 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf10 2.389025803395913 1.4732901147183992 83.77579999999999 0.9842000000000155 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf11 2.288831273542033 1.435952475412438 83.61619999999999 1.143800000000013 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf12 2.288831273542033 1.435952475412438 83.58959999999999 1.170400000000015 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf13 2.389025803395913 1.4732901147183992 83.58959999999999 1.170400000000015 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf14 2.3892790238475423 1.4731595166090572 83.4566 1.3034000000000106 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf15 2.390450803781405 1.4707319718833016 83.3768 1.3832000000000022 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 157 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf16 2.4373708430335537 1.49267343110314 83.3768 1.3832000000000022 -1 gpu conv fp16 11 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ -+++++ -conf17 2.4373708430335537 1.49267343110314 83.2704 1.48960000000001 -1 gpu conv fp16 12 add fp16 12 tanh fp16 12 -2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 -6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12 -7 gpu mul fp16 12 add fp16 12 -8 gpu softmax fp16 12 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/resnet18/resnet18_valid_soc.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/resnet18/resnet18_valid_soc.txt deleted file mode 100644 index 942789c1c4defd1139e75209ffbcb073a2b39b30..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/resnet18/resnet18_valid_soc.txt +++ /dev/null @@ -1,1576 +0,0 @@ -2593.3013975999997 -+++++ -conf1 1 1 89.42 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 -3 gpu conv fp32 11 add fp32 1 -4 gpu add fp32 11 -5 gpu relu fp32 11 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 -8 gpu add fp32 11 -9 gpu relu fp32 11 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 -11 gpu conv fp32 11 add fp32 1 -12 gpu add fp32 11 -13 gpu relu fp32 11 -14 gpu conv fp32 11 add fp32 1 relu fp32 1 -15 gpu conv fp32 11 add fp32 1 -16 gpu conv fp32 11 add fp32 1 -17 gpu add fp32 11 -18 gpu relu fp32 11 -19 gpu conv fp32 11 add fp32 1 relu fp32 1 -20 gpu conv fp32 11 add fp32 1 -21 gpu add fp32 11 -22 gpu relu fp32 11 -23 gpu conv fp32 11 add fp32 1 relu fp32 1 -24 gpu conv fp32 11 add fp32 1 -25 gpu add fp32 11 -26 gpu relu fp32 11 -27 gpu conv fp32 11 add fp32 1 relu fp32 1 -28 gpu conv fp32 11 add fp32 1 -29 gpu conv fp32 11 add fp32 1 -30 gpu add fp32 11 -31 gpu relu fp32 11 -32 gpu conv fp32 11 add fp32 1 relu fp32 1 -33 gpu conv fp32 11 add fp32 1 -34 gpu add fp32 11 -35 gpu relu fp32 11 -36 gpu conv fp32 11 add fp32 1 relu fp32 1 -37 gpu conv fp32 11 add fp32 1 -38 gpu add fp32 11 -39 gpu relu fp32 11 -40 gpu pool_mean fp32 11 -41 gpu mul fp32 11 add fp32 1 -42 gpu softmax fp32 1 ------ -+++++ -conf2 1.8227860146926984 1.3592380545823108 88.28 1.1400000000000006 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 162 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 166 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf3 1.772745264351603 1.3340968704252147 88.2 1.2199999999999989 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 166 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf4 1.831301934833889 1.3636544094268177 88.2 1.2199999999999989 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf5 1.7541385118416233 1.323200331238725 88.12 1.2999999999999972 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 166 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf6 1.750881760437994 1.3214899710791683 88.12 1.2999999999999972 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 166 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf7 1.9207420870636576 1.4105446231099241 88.1 1.3200000000000074 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv fp16 12 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf8 1.897654446584276 1.3943617562849198 88.1 1.3200000000000074 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 263 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv perf_fp16 154 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv samp_fp16 262 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf9 1.9276001243246026 1.4155139358802007 88.08 1.3400000000000034 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 168 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 155 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf10 1.8877611861107602 1.3945090937373315 88.03999999999999 1.3800000000000097 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 154 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 166 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf11 1.884015904997108 1.386748889441216 87.96000000000001 1.4599999999999937 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 263 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv perf_fp16 154 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv samp_fp16 262 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf12 1.815742308450095 1.3541765419789824 87.83999999999999 1.5800000000000125 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 262 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf13 1.928011277898605 1.414528053850526 87.83999999999999 1.5800000000000125 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 155 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf14 1.8702574116471649 1.3838796270391824 87.8 1.6200000000000045 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 269 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf15 1.9390257777318618 1.4193909923193697 87.8 1.6200000000000045 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv perf_fp16 154 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 155 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf16 1.8505712546542585 1.372601565984325 87.76 1.6599999999999966 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf17 1.931335957581042 1.4149043748735137 87.74 1.6800000000000068 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 157 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf18 1.8390656100510818 1.3668229301466752 87.68 1.7399999999999949 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf19 1.9360126662655235 1.416245073512222 87.64 1.7800000000000011 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 155 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 264 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf20 1.826739398491775 1.3609522133620269 87.62 1.7999999999999972 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 153 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv samp_fp16 262 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 165 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf21 1.8243322012642802 1.3542277148411042 87.62 1.7999999999999972 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 263 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf22 1.8245510435946863 1.3601414031759373 87.58 1.8400000000000034 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv samp_fp16 269 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf23 1.9832010015590205 1.4407797001367388 87.56 1.8599999999999994 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 261 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 155 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf24 1.831958859203629 1.3643626254848584 87.5 1.9200000000000017 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 151 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf25 1.827209961997738 1.3576190436536635 87.5 1.9200000000000017 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 263 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv perf_fp16 154 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv samp_fp16 262 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf26 1.9532893879837718 1.4253186875342474 87.5 1.9200000000000017 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 168 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 262 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf27 1.8598315807624513 1.376813374656673 87.48 1.9399999999999977 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf28 1.8545931630272876 1.3744725755811524 87.48 1.9399999999999977 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 267 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 152 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf29 1.9088935397779812 1.4033062374488858 87.44 1.980000000000004 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 163 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 267 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf30 1.8306014158563824 1.3613821654101905 87.44 1.980000000000004 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 265 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 168 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 262 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf31 1.9755297077095708 1.4378811225069261 87.44 1.980000000000004 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 159 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 159 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv perf_fp16 154 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 155 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf32 1.827200177575606 1.356175543415313 87.38 2.0400000000000063 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 264 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv perf_fp16 167 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf33 1.8517276001191023 1.3729319418960464 87.38 2.0400000000000063 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -7 gpu conv fp16 12 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv samp_fp16 269 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 157 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12 -24 gpu conv perf_fp16 160 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv samp_fp16 268 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 12 relu fp16 12 -37 gpu conv samp_fp16 269 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf34 1.8938192956663813 1.3919348631813433 87.38 2.0400000000000063 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 263 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv fp16 11 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv samp_fp16 262 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ -+++++ -conf35 1.8989539669005067 1.3938360809175603 87.36 2.0600000000000023 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv fp16 12 add fp16 12 relu fp16 12 -3 gpu conv fp16 12 add fp16 12 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 263 add fp16 12 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12 -11 gpu conv perf_fp16 154 add fp16 12 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 12 relu fp16 12 -15 gpu conv fp16 12 add fp16 12 -16 gpu conv fp16 11 add fp16 12 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 -20 gpu conv perf_fp16 151 add fp16 12 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 157 add fp16 12 relu fp16 12 -24 gpu conv samp_fp16 268 add fp16 12 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -28 gpu conv fp16 12 add fp16 12 -29 gpu conv perf_fp16 154 add fp16 12 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 12 relu fp16 12 -33 gpu conv fp16 12 add fp16 12 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -37 gpu conv samp_fp16 262 add fp16 12 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 12 -42 gpu softmax fp16 12 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar10/vgg16_cifar10_valid_soc.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar10/vgg16_cifar10_valid_soc.txt deleted file mode 100644 index 789f4e21cf4a778535d1df0f9f7be22c1415d672..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar10/vgg16_cifar10_valid_soc.txt +++ /dev/null @@ -1,1027 +0,0 @@ -3994.0731450000017 -+++++ -conf1 1 1 89.22 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 -16 gpu softmax fp32 1 ------ -+++++ -conf2 2.3049904288987464 1.6887800235455193 89.14 0.0799999999999983 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv fp16 11 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf3 2.357615734902983 1.7226289827534114 89.14 0.0799999999999983 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf4 2.3831343547359976 1.7374446557158316 88.84 0.37999999999999545 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 162 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf5 2.3696393667573616 1.7284732038695636 88.8 0.4200000000000017 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 162 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 265 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf6 2.4444787116056292 1.7833916898567774 88.58 0.6400000000000006 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf7 2.40209759505425 1.7661661942711917 88.58 0.6400000000000006 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf8 2.528892013058046 1.8332619869789675 88.08 1.1400000000000006 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf9 2.5283008295291105 1.8324605771289624 88.06 1.1599999999999966 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf10 2.5562616043247313 1.847605117430125 88.03999999999999 1.1800000000000068 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf11 2.5337351216813757 1.836759334487813 88.03999999999999 1.1800000000000068 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf12 2.556171297969468 1.8482604143790797 88.03999999999999 1.1800000000000068 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf13 2.5562385363337343 1.8481145682015834 88.03999999999999 1.1800000000000068 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf14 2.556612910921585 1.8486422226408725 88.03999999999999 1.1800000000000068 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf15 2.5419253262471346 1.8395765136023223 88.02 1.2000000000000028 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 263 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf16 2.4937721600323406 1.8116328904640306 88.0 1.2199999999999989 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv perf_fp16 162 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf17 2.5545877208248187 1.8465313171321942 88.0 1.2199999999999989 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf18 2.528537397828869 1.8330988121074523 88.0 1.2199999999999989 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf19 2.531670576114998 1.8357132731685366 88.0 1.2199999999999989 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf20 2.5294693760803577 1.8335105878862015 87.98 1.2399999999999949 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 268 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf21 2.5582293136941723 1.8476583031165972 87.98 1.2399999999999949 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf22 2.556327374925176 1.8481587827658859 87.98 1.2399999999999949 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf23 2.557806470696261 1.8492020211230846 87.98 1.2399999999999949 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf24 2.5545697480449 1.8464092920718178 87.96000000000001 1.259999999999991 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 267 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf25 2.528206406642683 1.832658178797549 87.96000000000001 1.259999999999991 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf26 2.556533707152568 1.8484262997816934 87.96000000000001 1.259999999999991 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf27 2.5393059900815325 1.837123626585959 87.94 1.2800000000000011 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 265 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf28 2.5486219361262235 1.845481069177171 87.94 1.2800000000000011 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf29 2.5485321687357825 1.8461348600374907 87.94 1.2800000000000011 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf30 2.5657339222733015 1.8517901869245543 87.92 1.2999999999999972 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 263 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf31 2.581139532058275 1.860666047394923 87.92 1.2999999999999972 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf32 2.5098654459068945 1.8297655130336108 87.92 1.2999999999999972 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf33 2.528587182046725 1.8312521826965082 87.9 1.3199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf34 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf35 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf36 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf37 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf38 2.5346932948358267 1.8376287813464989 87.9 1.3199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 265 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf39 2.4914548049246 1.8095620501702707 87.86 1.3599999999999994 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv perf_fp16 162 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 268 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf40 2.5809312104420865 1.8607657818447936 87.86 1.3599999999999994 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf41 2.5120056276901925 1.824277681148882 87.83999999999999 1.3800000000000097 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 268 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf42 2.556168516896762 1.849243225747987 87.83999999999999 1.3800000000000097 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf43 2.512713457130698 1.8053797549107755 87.82 1.4000000000000057 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf44 2.509447559327321 1.8294109824358684 87.82 1.4000000000000057 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf45 2.532043246184595 1.8347717424454622 87.74 1.480000000000004 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv samp_fp16 265 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf46 2.4911011329750212 1.795311376068545 87.68 1.539999999999992 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf47 2.549746515565958 1.8283676275816687 87.66000000000001 1.559999999999988 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv fp16 12 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf48 2.51145215830771 1.8254971754777813 87.64 1.5799999999999983 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf49 2.513356522647888 1.826263067419964 87.58 1.6400000000000006 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf50 2.513356522647888 1.826263067419964 87.53999999999999 1.6800000000000068 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf51 2.4881677905203494 1.8127135485543127 87.4 1.8199999999999932 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf52 2.51145215830771 1.8254971754777813 87.36 1.8599999999999994 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf53 2.4757784613808234 1.7991027289904775 87.26 1.9599999999999937 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf54 2.5913526715019284 1.8695479088125426 87.24 1.980000000000004 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar100/vgg16_cifar100_valid_soc.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar100/vgg16_cifar100_valid_soc.txt deleted file mode 100644 index ef6509b99bee287bf0e3dfbaa035d51f9e3cb0ea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar100/vgg16_cifar100_valid_soc.txt +++ /dev/null @@ -1,210 +0,0 @@ -3845.438677999999 -+++++ -conf1 1 1 68.42 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 -16 gpu softmax fp32 1 ------ -+++++ -conf2 2.4361074671227554 1.7555866253938424 67.22 1.2000000000000028 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv fp16 11 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 264 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf3 2.602684148359414 1.8286503060252126 67.10000000000001 1.3199999999999932 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv fp16 11 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf4 2.661880095451371 1.886369953641946 67.06 1.3599999999999994 -1 gpu conv fp16 12 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf5 2.5990656605003855 1.8588553950032938 66.84 1.5799999999999983 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf6 2.5884968081531485 1.8594972115815722 66.8 1.6200000000000045 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf7 2.4323231936537972 1.8028228076034056 66.8 1.6200000000000045 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf8 2.575472326184571 1.8375078883357683 66.72 1.7000000000000028 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv fp16 11 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf9 2.4912510106198957 1.848807665058795 66.58 1.8400000000000034 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf10 2.4323231936537972 1.8028228076034056 66.53999999999999 1.8800000000000097 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ -+++++ -conf11 2.4027045398540046 1.7853827712848849 66.47999999999999 1.940000000000012 -1 gpu conv fp16 11 add fp16 12 relu fp16 12 -2 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12 -3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -5 gpu conv fp16 12 add fp16 12 relu fp16 12 -6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 -7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 -9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 -10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12 -11 gpu conv perf_fp16 160 add fp16 12 relu fp16 12 -12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 -13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12 -14 gpu mul fp16 12 add fp16 12 relu fp16 12 -15 gpu mul fp16 12 add fp16 12 -16 gpu softmax fp16 12 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/README.md b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/README.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet2_cifar10/alexnet2_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet2_cifar10/alexnet2_cifar10.txt deleted file mode 100644 index 208f154e02ef37a6ae87904844c826ce72012b32..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet2_cifar10/alexnet2_cifar10.txt +++ /dev/null @@ -1,23 +0,0 @@ -1114.3009809999999 -+++++ -conf1 1 1 84.76 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 -6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 11 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf2 1.678391931801309 1.4393008204786808 84.76 0.0 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet_cifar10/alexnet_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet_cifar10/alexnet_cifar10.txt deleted file mode 100644 index eba22e3f01e227041fcb406f87a996837cd5fa2b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet_cifar10/alexnet_cifar10.txt +++ /dev/null @@ -1,421 +0,0 @@ -2592.187221 -+++++ -conf1 1 1 78.78 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf2 1.7593976485873195 1.6193399031642917 78.78 0.0 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf3 2.081712090729918 1.9102226906341664 78.53999999999999 0.2400000000000091 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf4 2.081712090729918 1.9102226906341664 78.53999999999999 0.2400000000000091 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf5 2.2627828537139263 2.065683616898884 78.34 0.4399999999999977 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf6 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf7 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf8 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf9 2.2247938983110425 2.060416584958474 77.98 0.7999999999999972 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf10 2.2247938983110425 2.060416584958474 77.98 0.7999999999999972 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf11 2.4370818494175888 2.250857540113024 77.98 0.7999999999999972 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf12 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf13 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf14 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf15 2.228328207535687 2.0675123320068267 77.82 0.960000000000008 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf16 2.228328207535687 2.0675123320068267 77.82 0.960000000000008 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf17 2.3417491169395532 2.1355030360671465 77.78 1.0 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf18 2.3417491169395532 2.1355030360671465 77.78 1.0 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf19 2.3417491169395532 2.1355030360671465 77.78 1.0 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf20 2.5243776633638846 2.324968713897418 77.78 1.0 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf21 2.5243776633638846 2.324968713897418 77.78 1.0 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf22 2.5243776633638846 2.324968713897418 77.78 1.0 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf23 2.5371416718362823 2.3372173527293847 77.56 1.2199999999999989 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf24 2.5371416718362823 2.3372173527293847 77.56 1.2199999999999989 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf25 2.472472828611022 2.286262888143739 77.48 1.2999999999999972 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf26 2.574475112841438 2.3637004022727544 77.4 1.3799999999999955 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf27 2.1200397577541747 1.951741010849448 77.3 1.480000000000004 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf28 2.1200397577541747 1.951741010849448 77.3 1.480000000000004 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf29 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf30 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf31 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf32 2.541739061163583 2.3463519042470864 77.18 1.5999999999999943 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf33 2.541739061163583 2.3463519042470864 77.18 1.5999999999999943 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf34 2.580258965052788 2.3848508703934153 76.96 1.8200000000000074 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf35 2.580258965052788 2.3848508703934153 76.96 1.8200000000000074 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf36 2.4768386387310675 2.295002745725082 76.94 1.8400000000000034 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf37 2.5713008246729716 2.3684101116633007 76.94 1.8400000000000034 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf38 2.5713008246729716 2.3684101116633007 76.94 1.8400000000000034 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf39 2.5670585645212847 2.3720992406158463 76.92 1.8599999999999994 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf40 2.5670585645212847 2.3720992406158463 76.92 1.8599999999999994 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf41 2.5760229577267673 2.3777906009584133 76.9 1.8799999999999955 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf42 2.5760229577267673 2.3777906009584133 76.9 1.8799999999999955 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet_imagenet/alexnet_imagenet.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet_imagenet/alexnet_imagenet.txt deleted file mode 100644 index 8ae986b90ce53e80d10e19525a51ec32f51397d8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/alexnet_imagenet/alexnet_imagenet.txt +++ /dev/null @@ -1,289 +0,0 @@ -2739.950736 -+++++ -conf1 1 1 56.3 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 relu fp32 1 -7 gpu mul fp32 11 add fp32 1 relu fp32 1 -8 gpu mul fp32 11 add fp32 1 -9 gpu softmax fp32 1 ------ -+++++ -conf2 1.802133644103582 1.8186433204507424 55.76 0.5399999999999991 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf3 2.0227701930718065 2.043112495268932 55.42 0.8799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf4 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf5 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf6 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf7 2.085011755614172 2.122606306624671 54.92 1.3799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf8 2.085011755614172 2.122606306624671 54.92 1.3799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf9 1.8052659214923805 1.8217111622759978 54.82 1.4799999999999969 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf10 2.0146435217865446 2.0367475358800102 54.58 1.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf11 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf12 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf13 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf14 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf15 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf16 2.028037341700216 2.049760395549724 53.98 2.3200000000000003 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf17 2.028037341700216 2.049760395549724 53.98 2.3200000000000003 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf18 2.028037341700216 2.049760395549724 53.98 2.3200000000000003 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf19 1.8052659214923805 1.8217111622759978 53.879999999999995 2.4200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf20 1.8052659214923805 1.8217111622759978 53.879999999999995 2.4200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf21 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf22 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf23 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf24 2.0185588815268836 2.0405961127674277 53.559999999999995 2.740000000000002 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/lenet_keras/lenet_keras.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/lenet_keras/lenet_keras.txt deleted file mode 100644 index da88f7cd26b049fd18644a834e4d34b944149cb2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/lenet_keras/lenet_keras.txt +++ /dev/null @@ -1,409 +0,0 @@ -282.5141369999999 -+++++ -conf1 1 1 98.7 0.0 -1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp32 11 add fp32 1 tanh fp32 1 -4 gpu mul fp32 11 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf2 1.9343699741206566 2.1183040240042 98.68 0.01999999999999602 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf3 1.9343699741206566 2.1183040240042 98.68 0.01999999999999602 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf4 1.8936889628815377 2.139779619692146 98.68 0.01999999999999602 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf5 1.8936889628815377 2.139779619692146 98.68 0.01999999999999602 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf6 1.6415764141643088 1.8012120076077847 98.66 0.04000000000000625 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf7 1.9358279784215788 2.1233340385374495 98.66 0.04000000000000625 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf8 1.9358279784215788 2.1233340385374495 98.66 0.04000000000000625 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf9 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf10 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf11 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf12 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf13 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf14 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf15 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf16 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf17 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf18 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf19 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf20 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf21 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf22 1.9040998718547615 2.1501783570812565 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 151 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf23 1.9040998718547615 2.1501783570812565 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 151 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf24 1.5630416487818 1.7451546885860074 98.6 0.10000000000000853 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf25 1.5630416487818 1.7451546885860074 98.6 0.10000000000000853 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf26 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf27 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf28 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf29 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf30 2.1941568976363475 2.4445764373737644 98.6 0.10000000000000853 -1 gpu conv samp_fp16 269 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf31 2.1941568976363475 2.4445764373737644 98.6 0.10000000000000853 -1 gpu conv samp_fp16 269 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf32 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf33 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf34 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf35 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf36 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf37 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf38 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf39 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf40 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf41 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf42 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf43 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf44 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf45 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf46 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf47 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf48 1.6319327047042609 1.8046853367113418 98.54 0.1599999999999966 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf49 1.6350106933897723 1.8435952834193967 98.52 0.18000000000000682 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf50 1.6350106933897723 1.8435952834193967 98.52 0.18000000000000682 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf51 1.6510114896409525 1.8591762752048948 98.48 0.21999999999999886 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/mobilenet_cifar10/mobilenet_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/mobilenet_cifar10/mobilenet_cifar10.txt deleted file mode 100644 index 93ca37c00a73f1a1cfc72bf58e8067906269d813..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/mobilenet_cifar10/mobilenet_cifar10.txt +++ /dev/null @@ -1,871 +0,0 @@ -4077.307063200001 -+++++ -conf1 1 1 84.42 0.0 -1 gpu conv fp32 11 -2 gpu batchnorm fp32 11 -3 gpu relu fp32 11 -4 gpu group_conv fp32 11 -5 gpu batchnorm fp32 11 -6 gpu relu fp32 11 -7 gpu conv fp32 11 -8 gpu batchnorm fp32 11 -9 gpu relu fp32 11 -10 gpu group_conv fp32 11 -11 gpu batchnorm fp32 11 -12 gpu relu fp32 11 -13 gpu conv fp32 11 -14 gpu batchnorm fp32 11 -15 gpu relu fp32 11 -16 gpu group_conv fp32 11 -17 gpu batchnorm fp32 11 -18 gpu relu fp32 11 -19 gpu conv fp32 11 -20 gpu batchnorm fp32 11 -21 gpu relu fp32 11 -22 gpu group_conv fp32 11 -23 gpu batchnorm fp32 11 -24 gpu relu fp32 11 -25 gpu conv fp32 11 -26 gpu batchnorm fp32 11 -27 gpu relu fp32 11 -28 gpu group_conv fp32 11 -29 gpu batchnorm fp32 11 -30 gpu relu fp32 11 -31 gpu conv fp32 11 -32 gpu batchnorm fp32 11 -33 gpu relu fp32 11 -34 gpu group_conv fp32 11 -35 gpu batchnorm fp32 11 -36 gpu relu fp32 11 -37 gpu conv fp32 11 -38 gpu batchnorm fp32 11 -39 gpu relu fp32 11 -40 gpu group_conv fp32 11 -41 gpu batchnorm fp32 11 -42 gpu relu fp32 11 -43 gpu conv fp32 11 -44 gpu batchnorm fp32 11 -45 gpu relu fp32 11 -46 gpu group_conv fp32 11 -47 gpu batchnorm fp32 11 -48 gpu relu fp32 11 -49 gpu conv fp32 11 -50 gpu batchnorm fp32 11 -51 gpu relu fp32 11 -52 gpu group_conv fp32 11 -53 gpu batchnorm fp32 11 -54 gpu relu fp32 11 -55 gpu conv fp32 11 -56 gpu batchnorm fp32 11 -57 gpu relu fp32 11 -58 gpu group_conv fp32 11 -59 gpu batchnorm fp32 11 -60 gpu relu fp32 11 -61 gpu conv fp32 11 -62 gpu batchnorm fp32 11 -63 gpu relu fp32 11 -64 gpu group_conv fp32 11 -65 gpu batchnorm fp32 11 -66 gpu relu fp32 11 -67 gpu conv fp32 11 -68 gpu batchnorm fp32 11 -69 gpu relu fp32 11 -70 gpu group_conv fp32 11 -71 gpu batchnorm fp32 11 -72 gpu relu fp32 11 -73 gpu conv fp32 11 -74 gpu batchnorm fp32 11 -75 gpu relu fp32 11 -76 gpu group_conv fp32 11 -77 gpu batchnorm fp32 11 -78 gpu relu fp32 11 -79 gpu conv fp32 11 -80 gpu batchnorm fp32 11 -81 gpu relu fp32 11 -82 gpu pool_mean fp32 11 -83 gpu mul fp32 11 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf2 1.504059255565631 1.4598468219902432 81.86 2.5600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 152 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf3 1.5040783418076804 1.459845395800413 81.86 2.5600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 152 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 152 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf4 1.5042737817275433 1.4598464522370567 81.74 2.680000000000007 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf5 1.5042737817275433 1.4598464522370567 81.74 2.680000000000007 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf6 1.5070383438802568 1.463241585164149 81.69999999999999 2.720000000000013 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 152 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf7 1.5070575058058588 1.463240152333617 81.58 2.8400000000000034 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 152 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 152 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf8 1.5039678813445672 1.4598454486222088 81.56 2.8599999999999994 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 152 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 152 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf9 1.5038655354281372 1.4599130636549171 81.46 2.960000000000008 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 161 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 152 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 152 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf10 1.4785375660713596 1.4280520288797043 84.42 0.0 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv fp16 12 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv fp16 12 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv fp16 12 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv fp16 12 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/resnet18_cifar10/resnet18_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/resnet18_cifar10/resnet18_cifar10.txt deleted file mode 100644 index d1d75a011e9ada7994dcd5a31ee5d56fc2ee3e2f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/resnet18_cifar10/resnet18_cifar10.txt +++ /dev/null @@ -1,91 +0,0 @@ -2484.981244 -+++++ -conf1 1 1 89.42 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 -3 gpu conv fp32 11 add fp32 1 -4 gpu add fp32 11 -5 gpu relu fp32 11 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 -8 gpu add fp32 11 -9 gpu relu fp32 11 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 -11 gpu conv fp32 11 add fp32 1 -12 gpu add fp32 11 -13 gpu relu fp32 11 -14 gpu conv fp32 11 add fp32 1 relu fp32 1 -15 gpu conv fp32 11 add fp32 1 -16 gpu conv fp32 11 add fp32 1 -17 gpu add fp32 11 -18 gpu relu fp32 11 -19 gpu conv fp32 11 add fp32 1 relu fp32 1 -20 gpu conv fp32 11 add fp32 1 -21 gpu add fp32 11 -22 gpu relu fp32 11 -23 gpu conv fp32 11 add fp32 1 relu fp32 1 -24 gpu conv fp32 11 add fp32 1 -25 gpu add fp32 11 -26 gpu relu fp32 11 -27 gpu conv fp32 11 add fp32 1 relu fp32 1 -28 gpu conv fp32 11 add fp32 1 -29 gpu conv fp32 11 add fp32 1 -30 gpu add fp32 11 -31 gpu relu fp32 11 -32 gpu conv fp32 11 add fp32 1 relu fp32 1 -33 gpu conv fp32 11 add fp32 1 -34 gpu add fp32 11 -35 gpu relu fp32 11 -36 gpu conv fp32 11 add fp32 1 relu fp32 1 -37 gpu conv fp32 11 add fp32 1 -38 gpu add fp32 11 -39 gpu relu fp32 11 -40 gpu pool_mean fp32 11 -41 gpu mul fp32 11 add fp32 1 -42 gpu softmax fp32 1 ------ -+++++ -conf2 1.3617910209460897 1.3866827244386561 89.42 0.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 -11 gpu conv fp16 12 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 12 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 add fp16 1 relu fp16 1 -20 gpu conv fp16 12 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv fp16 12 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 12 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv fp16 12 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/resnet50_imagenet/resnet50_imagenet.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/resnet50_imagenet/resnet50_imagenet.txt deleted file mode 100644 index a045011580adb912289364d35fb85668e74261e7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/resnet50_imagenet/resnet50_imagenet.txt +++ /dev/null @@ -1,1233 +0,0 @@ -7161.053769000008 -+++++ -conf1 1 1 75.7 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu batchnorm fp32 11 -3 gpu conv fp32 11 add fp32 1 -4 gpu batchnorm fp32 11 -5 gpu relu fp32 11 -6 gpu conv fp32 11 add fp32 1 -7 gpu batchnorm fp32 11 -8 gpu relu fp32 11 -9 gpu conv fp32 11 add fp32 1 -10 gpu batchnorm fp32 11 -11 gpu conv fp32 11 add fp32 1 -12 gpu batchnorm fp32 11 -13 gpu add fp32 11 -14 gpu relu fp32 11 -15 gpu conv fp32 11 add fp32 1 -16 gpu batchnorm fp32 11 -17 gpu relu fp32 11 -18 gpu conv fp32 11 add fp32 1 -19 gpu batchnorm fp32 11 -20 gpu relu fp32 11 -21 gpu conv fp32 11 add fp32 1 -22 gpu batchnorm fp32 11 -23 gpu add fp32 11 -24 gpu relu fp32 11 -25 gpu conv fp32 11 add fp32 1 -26 gpu batchnorm fp32 11 -27 gpu relu fp32 11 -28 gpu conv fp32 11 add fp32 1 -29 gpu batchnorm fp32 11 -30 gpu relu fp32 11 -31 gpu conv fp32 11 add fp32 1 -32 gpu batchnorm fp32 11 -33 gpu add fp32 11 -34 gpu relu fp32 11 -35 gpu conv fp32 11 add fp32 1 -36 gpu batchnorm fp32 11 -37 gpu relu fp32 11 -38 gpu conv fp32 11 add fp32 1 -39 gpu batchnorm fp32 11 -40 gpu relu fp32 11 -41 gpu conv fp32 11 add fp32 1 -42 gpu batchnorm fp32 11 -43 gpu conv fp32 11 add fp32 1 -44 gpu batchnorm fp32 11 -45 gpu add fp32 11 -46 gpu relu fp32 11 -47 gpu conv fp32 11 add fp32 1 -48 gpu batchnorm fp32 11 -49 gpu relu fp32 11 -50 gpu conv fp32 11 add fp32 1 -51 gpu batchnorm fp32 11 -52 gpu relu fp32 11 -53 gpu conv fp32 11 add fp32 1 -54 gpu batchnorm fp32 11 -55 gpu add fp32 11 -56 gpu relu fp32 11 -57 gpu conv fp32 11 add fp32 1 -58 gpu batchnorm fp32 11 -59 gpu relu fp32 11 -60 gpu conv fp32 11 add fp32 1 -61 gpu batchnorm fp32 11 -62 gpu relu fp32 11 -63 gpu conv fp32 11 add fp32 1 -64 gpu batchnorm fp32 11 -65 gpu add fp32 11 -66 gpu relu fp32 11 -67 gpu conv fp32 11 add fp32 1 -68 gpu batchnorm fp32 11 -69 gpu relu fp32 11 -70 gpu conv fp32 11 add fp32 1 -71 gpu batchnorm fp32 11 -72 gpu relu fp32 11 -73 gpu conv fp32 11 add fp32 1 -74 gpu batchnorm fp32 11 -75 gpu add fp32 11 -76 gpu relu fp32 11 -77 gpu conv fp32 11 add fp32 1 -78 gpu batchnorm fp32 11 -79 gpu relu fp32 11 -80 gpu conv fp32 11 add fp32 1 -81 gpu batchnorm fp32 11 -82 gpu relu fp32 11 -83 gpu conv fp32 11 add fp32 1 -84 gpu batchnorm fp32 11 -85 gpu conv fp32 11 add fp32 1 -86 gpu batchnorm fp32 11 -87 gpu add fp32 11 -88 gpu relu fp32 11 -89 gpu conv fp32 11 add fp32 1 -90 gpu batchnorm fp32 11 -91 gpu relu fp32 11 -92 gpu conv fp32 11 add fp32 1 -93 gpu batchnorm fp32 11 -94 gpu relu fp32 11 -95 gpu conv fp32 11 add fp32 1 -96 gpu batchnorm fp32 11 -97 gpu add fp32 11 -98 gpu relu fp32 11 -99 gpu conv fp32 11 add fp32 1 -100 gpu batchnorm fp32 11 -101 gpu relu fp32 11 -102 gpu conv fp32 11 add fp32 1 -103 gpu batchnorm fp32 11 -104 gpu relu fp32 11 -105 gpu conv fp32 11 add fp32 1 -106 gpu batchnorm fp32 11 -107 gpu add fp32 11 -108 gpu relu fp32 11 -109 gpu conv fp32 11 add fp32 1 -110 gpu batchnorm fp32 11 -111 gpu relu fp32 11 -112 gpu conv fp32 11 add fp32 1 -113 gpu batchnorm fp32 11 -114 gpu relu fp32 11 -115 gpu conv fp32 11 add fp32 1 -116 gpu batchnorm fp32 11 -117 gpu add fp32 11 -118 gpu relu fp32 11 -119 gpu conv fp32 11 add fp32 1 -120 gpu batchnorm fp32 11 -121 gpu relu fp32 11 -122 gpu conv fp32 11 add fp32 1 -123 gpu batchnorm fp32 11 -124 gpu relu fp32 11 -125 gpu conv fp32 11 add fp32 1 -126 gpu batchnorm fp32 11 -127 gpu add fp32 11 -128 gpu relu fp32 11 -129 gpu conv fp32 11 add fp32 1 -130 gpu batchnorm fp32 11 -131 gpu relu fp32 11 -132 gpu conv fp32 11 add fp32 1 -133 gpu batchnorm fp32 11 -134 gpu relu fp32 11 -135 gpu conv fp32 11 add fp32 1 -136 gpu batchnorm fp32 11 -137 gpu add fp32 11 -138 gpu relu fp32 11 -139 gpu conv fp32 11 add fp32 1 -140 gpu batchnorm fp32 11 -141 gpu relu fp32 11 -142 gpu conv fp32 11 add fp32 1 -143 gpu batchnorm fp32 11 -144 gpu relu fp32 11 -145 gpu conv fp32 11 add fp32 1 -146 gpu batchnorm fp32 11 -147 gpu conv fp32 11 add fp32 1 -148 gpu batchnorm fp32 11 -149 gpu add fp32 11 -150 gpu relu fp32 11 -151 gpu conv fp32 11 add fp32 1 -152 gpu batchnorm fp32 11 -153 gpu relu fp32 11 -154 gpu conv fp32 11 add fp32 1 -155 gpu batchnorm fp32 11 -156 gpu relu fp32 11 -157 gpu conv fp32 11 add fp32 1 -158 gpu batchnorm fp32 11 -159 gpu add fp32 11 -160 gpu relu fp32 11 -161 gpu conv fp32 11 add fp32 1 -162 gpu batchnorm fp32 11 -163 gpu relu fp32 11 -164 gpu conv fp32 11 add fp32 1 -165 gpu batchnorm fp32 11 -166 gpu relu fp32 11 -167 gpu conv fp32 11 add fp32 1 -168 gpu batchnorm fp32 11 -169 gpu add fp32 11 -170 gpu relu fp32 11 -171 gpu pool_max fp32 11 -172 gpu mul fp32 11 add fp32 1 -173 gpu softmax fp32 1 ------ -+++++ -conf2 1.8254789092281507 1.4527803526239977 75.7 0.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv fp16 12 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv fp16 12 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf3 1.8521749055745271 1.472492365706726 75.02 0.6800000000000068 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv perf_fp16 160 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 11 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 11 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv perf_fp16 164 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf4 1.8509087142956673 1.4713858340895483 74.68 1.019999999999996 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv perf_fp16 160 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv fp16 12 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf5 1.8538077719438253 1.4749308494814874 73.82 1.8800000000000097 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv perf_fp16 160 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 11 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv perf_fp16 153 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 11 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv perf_fp16 164 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv samp_fp16 268 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 11 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 11 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf6 1.8538077719438253 1.4749308494814874 73.7 2.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv perf_fp16 160 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 11 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv perf_fp16 153 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 11 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv perf_fp16 164 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv samp_fp16 268 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf7 1.8577902325643394 1.478552049679054 72.82 2.8800000000000097 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv perf_fp16 160 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 11 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 11 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv samp_fp16 268 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 11 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv perf_fp16 164 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv samp_fp16 268 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv perf_fp16 158 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 11 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_cifar10/vgg16_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_cifar10/vgg16_cifar10.txt deleted file mode 100644 index f4e185f358dbd2282b14c0865d829903d2d270e9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_cifar10/vgg16_cifar10.txt +++ /dev/null @@ -1,58 +0,0 @@ -3776.508929999999 -+++++ -conf1 1 1 89.96 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 -16 gpu softmax fp32 1 ------ -+++++ -conf2 2.4192803184847484 2.2393153800931898 89.22 0.7399999999999949 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf3 2.1240075032467187 1.9749367321301132 88.64 1.3199999999999932 -1 gpu conv fp16 11 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -12 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_cifar100/vgg16_cifar100.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_cifar100/vgg16_cifar100.txt deleted file mode 100644 index b55bb668b140ebcc9ee911f728726afed7274f85..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_cifar100/vgg16_cifar100.txt +++ /dev/null @@ -1,77 +0,0 @@ -3768.819777999999 -+++++ -conf1 1 1 66.5 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 -16 gpu softmax fp32 1 ------ -+++++ -conf2 2.2793321208062913 2.0502797911533945 66.42 0.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf3 2.2793321208062913 2.0502797911533945 66.42 0.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf4 2.664296720624579 2.427276363573644 64.7 1.7999999999999972 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_imagenet/vgg16_imagenet.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_imagenet/vgg16_imagenet.txt deleted file mode 100644 index d0a23ffb10367c45ab76e4477f29932a5431e68b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/empirical/vgg16_imagenet/vgg16_imagenet.txt +++ /dev/null @@ -1,41 +0,0 @@ -19194.623482 -+++++ -conf1 1 1 72.84 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 relu fp32 1 -16 gpu mul fp32 11 add fp32 1 -17 gpu softmax fp32 1 ------ -+++++ -conf2 1.7719381411481732 1.5850925672384186 72.84 0.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet2_cifar10/alexnet2_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet2_cifar10/alexnet2_cifar10.txt deleted file mode 100644 index 6ec4a06d3dbd2e088d6db287d23dd3bd5aad7ddb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet2_cifar10/alexnet2_cifar10.txt +++ /dev/null @@ -1,419 +0,0 @@ -1114.3009809999999 -+++++ -conf1 1 1 84.98 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 -6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 11 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf2 2.4248748377353113 2.0815908534183163 84.5 0.480000000000004 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf3 2.4055188425519614 2.0586265720811823 84.48 0.5 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf4 2.4156140842962985 2.0617867479342706 84.28 0.7000000000000028 -1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf5 2.396416918342732 2.0506214971794585 84.02 0.960000000000008 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf6 2.463002582910052 2.1171077568609458 83.84 1.1400000000000006 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf7 2.360283215266004 2.0255245321874304 83.78 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf8 2.4140791541736157 2.0671513522247653 83.74000000000001 1.2399999999999949 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf9 2.457753689612079 2.1086250651240137 83.7 1.2800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf10 2.459170454055443 2.1111925341396343 83.7 1.2800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 164 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf11 2.4135986141645764 2.060453960420927 83.62 1.3599999999999994 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf12 2.4631278039012106 2.1092094797926637 83.58 1.4000000000000057 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf13 2.535761391794481 2.16998336112692 83.58 1.4000000000000057 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf14 2.289006193945062 1.961240158652051 83.54 1.4399999999999977 -1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf15 2.4257674844112573 2.0808440756495563 83.5 1.480000000000004 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 161 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf16 2.458122368488622 2.109531159729078 83.48 1.5 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf17 2.281072202152105 1.9539314420536427 83.46000000000001 1.519999999999996 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf18 2.4572171342078444 2.1088933553775697 83.46000000000001 1.519999999999996 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf19 2.3017607719030058 1.9782265708150768 83.42 1.5600000000000023 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf20 2.379206814483014 2.047909200292713 83.39999999999999 1.5800000000000125 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 151 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf21 2.4636282705302537 2.1162281156388527 83.39999999999999 1.5800000000000125 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf22 2.461590101374146 2.1108493881199184 83.22 1.7600000000000051 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 161 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf23 2.537054645442804 2.167568834938183 83.22 1.7600000000000051 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf24 2.4631604723407885 2.1099694757102845 83.17999999999999 1.8000000000000114 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf25 2.4636282705302537 2.1162281156388527 83.14 1.8400000000000034 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf26 2.462588899729088 2.109477918791931 83.14 1.8400000000000034 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf27 2.4638085754689025 2.1071960926343603 83.1 1.8800000000000097 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf28 2.4640079766123635 2.110326453157297 83.08 1.9000000000000057 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf29 2.459337622764853 2.107249218450713 83.06 1.9200000000000017 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf30 2.538176340059405 2.173287257415721 83.02000000000001 1.9599999999999937 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 164 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf31 2.3905426931959846 2.044333576277581 83.02000000000001 1.9599999999999937 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf32 2.459337622764853 2.107249218450713 83.0 1.980000000000004 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf33 2.458968579288317 2.1063450826631396 82.89999999999999 2.0800000000000125 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf34 2.2912974651603877 1.9670210508860688 82.8 2.180000000000007 -1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf35 2.4648489763056327 2.113931670664391 82.66 2.3200000000000074 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf36 2.4599076869402854 2.1077397371200193 82.6 2.3800000000000097 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf37 2.4636282705302537 2.1162281156388527 82.54 2.4399999999999977 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf38 2.591814267389778 2.222680944458784 82.26 2.719999999999999 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet_cifar10/alexnet_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet_cifar10/alexnet_cifar10.txt deleted file mode 100644 index a9ccba6eb63f620c0e3b6f95fd7c50892018f00f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet_cifar10/alexnet_cifar10.txt +++ /dev/null @@ -1,511 +0,0 @@ -2592.187221 -+++++ -conf1 1 1 79.28 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf2 1.7593976485873195 1.6193399031642917 79.23 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf3 2.092625440752526 1.9139078015388271 78.96 0.3200000000000074 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf4 1.8870195448805414 1.7296919053025768 78.8 0.480000000000004 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf5 2.1184804041774554 1.9598989563949536 78.75999999999999 0.5200000000000102 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf6 2.1184804041774554 1.9598989563949536 78.75999999999999 0.5200000000000102 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf7 2.0933825381386364 1.9150743378318535 78.64 0.6400000000000006 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf8 2.081712090729918 1.9102226906341664 78.5 0.7800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf9 2.081712090729918 1.9102226906341664 78.5 0.7800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf10 2.2662606588487595 2.066560750795139 78.48 0.7999999999999972 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf11 2.121684761285686 1.966318179285323 78.48 0.7999999999999972 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf12 2.3417491169395532 2.1355030360671465 78.38000000000001 0.8999999999999915 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf13 2.2247938983110425 2.060416584958474 78.38000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf14 2.2247938983110425 2.060416584958474 78.38000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf15 2.2247938983110425 2.060416584958474 78.38000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf16 2.2627828537139263 2.065683616898884 78.32000000000001 0.9599999999999937 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf17 2.2627828537139263 2.065683616898884 78.32000000000001 0.9599999999999937 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf18 2.2627828537139263 2.065683616898884 78.32000000000001 0.9599999999999937 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf19 2.146571989407323 1.95711703610764 78.18 1.0999999999999943 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf20 2.303316973793268 2.1036463961913276 78.10000000000001 1.1799999999999926 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf21 2.436875653706139 2.2434837737118056 78.08 1.2000000000000028 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf22 2.436875653706139 2.2434837737118056 78.08 1.2000000000000028 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf23 2.436875653706139 2.2434837737118056 78.08 1.2000000000000028 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf24 2.1106508925330925 1.9419233584234938 78.06 1.2199999999999989 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf25 2.3203534290038634 2.116965679235447 78.06 1.2199999999999989 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf26 2.3527290658539215 2.145832257234814 78.03999999999999 1.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf27 2.3527290658539215 2.145832257234814 78.03999999999999 1.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf28 2.432854949808342 2.2424500615508003 78.0 1.2800000000000011 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf29 2.432854949808342 2.2424500615508003 78.0 1.2800000000000011 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf30 2.432854949808342 2.2424500615508003 78.0 1.2800000000000011 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf31 2.3137982135449207 2.1281257317083417 77.84 1.4399999999999977 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf32 2.1198074418988333 1.9522214255218437 77.82 1.460000000000008 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf33 2.246924974355375 2.065289762405701 77.8 1.480000000000004 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf34 2.263614734554485 2.090777846534249 77.74 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf35 2.263614734554485 2.090777846534249 77.74 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf36 2.263614734554485 2.090777846534249 77.74 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf37 2.5289288699015304 2.334007588396142 77.72 1.5600000000000023 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf38 2.5289288699015304 2.334007588396142 77.72 1.5600000000000023 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf39 2.3117594882585775 2.1152397180868943 77.56 1.7199999999999989 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf40 2.452732477854469 2.264573687601476 77.56 1.7199999999999989 -1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf41 2.452732477854469 2.264573687601476 77.56 1.7199999999999989 -1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf42 2.382518688546389 2.178614303992064 77.5 1.7800000000000011 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf43 2.382518688546389 2.178614303992064 77.5 1.7800000000000011 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf44 2.3900667100485924 2.188128526401265 77.48 1.7999999999999972 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf45 2.3900667100485924 2.188128526401265 77.48 1.7999999999999972 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf46 2.3900667100485924 2.188128526401265 77.48 1.7999999999999972 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf47 2.4835281673276515 2.279527076032239 77.3 1.980000000000004 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf48 2.4835281673276515 2.279527076032239 77.3 1.980000000000004 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf49 2.1553694968551302 1.9959124044028933 77.18 2.0999999999999943 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf50 2.5877520959724816 2.3763616521050364 77.03999999999999 2.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf51 2.5877520959724816 2.3763616521050364 77.03999999999999 2.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet_imagenet/alexnet_imagenet.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet_imagenet/alexnet_imagenet.txt deleted file mode 100644 index b0e42a5aaa5d7b5a06b6422a5c33a0047b6eff8d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/alexnet_imagenet/alexnet_imagenet.txt +++ /dev/null @@ -1,229 +0,0 @@ -2739.950736 -+++++ -conf1 1 1 56.3 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 relu fp32 1 -7 gpu mul fp32 11 add fp32 1 relu fp32 1 -8 gpu mul fp32 11 add fp32 1 -9 gpu softmax fp32 1 ------ -+++++ -conf2 1.802133644103582 1.8186433204507424 55.76 0.5399999999999991 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf3 1.7574572103878898 1.7673706184460103 55.58 0.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf4 2.0227701930718065 2.043112495268932 55.42 0.8799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf5 1.9872634777043927 2.002789650227035 55.120000000000005 1.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf6 1.8204253918445088 1.843736069756362 54.84 1.4599999999999937 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf7 1.9308336510645352 1.934889049414224 54.74 1.5599999999999952 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf8 2.0146435217865446 2.0367475358800102 54.58 1.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf9 2.0101709494490696 2.0329911158023064 54.400000000000006 1.8999999999999915 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf10 2.0052132441967916 2.0284931705407003 54.300000000000004 1.999999999999993 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf11 2.010827434817262 2.036001862538864 54.2 2.0999999999999943 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf12 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf13 1.9923471030291253 2.009177323959059 54.120000000000005 2.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf14 1.9923471030291253 2.009177323959059 54.120000000000005 2.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf15 2.028037341700216 2.049760395549724 54.0 2.299999999999997 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf16 1.9910730364852436 2.006510848093771 53.54 2.759999999999998 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf17 2.1567475543719614 2.159142310265706 53.300000000000004 2.999999999999993 -1 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf18 2.1567475543719614 2.159142310265706 53.300000000000004 2.999999999999993 -1 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf19 2.0232690820426464 2.0527698121318476 53.300000000000004 2.999999999999993 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/lenet_keras/lenet_keras.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/lenet_keras/lenet_keras.txt deleted file mode 100644 index b4e51dff426f4d3c5cb7b9572e6aa5940212acbd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/lenet_keras/lenet_keras.txt +++ /dev/null @@ -1,409 +0,0 @@ -282.5141369999999 -+++++ -conf1 1 1 98.7 0.0 -1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp32 11 add fp32 1 tanh fp32 1 -4 gpu mul fp32 11 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf2 1.828613181003043 2.071721708828981 98.65 0.04999999999999716 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf3 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf4 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf5 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf6 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf7 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf8 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf9 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf10 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf11 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf12 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf13 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf14 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf15 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf16 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf17 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf18 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf19 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf20 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf21 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf22 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf23 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf24 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf25 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf26 2.200653361151419 2.425091789360736 98.6 0.10000000000000853 -1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf27 2.200653361151419 2.425091789360736 98.6 0.10000000000000853 -1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf28 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf29 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf30 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf31 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf32 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf33 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf34 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf35 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf36 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf37 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf38 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf39 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf40 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf41 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf42 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf43 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf44 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf45 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf46 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf47 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf48 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf49 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf50 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf51 1.8534621507951072 2.1231113105788597 98.44000000000001 0.2599999999999909 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/mobilenet_cifar10/mobilenet_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/mobilenet_cifar10/mobilenet_cifar10.txt deleted file mode 100644 index b4d8bd893c8d9395fce6a3484d75f543f1e72da2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/mobilenet_cifar10/mobilenet_cifar10.txt +++ /dev/null @@ -1,3220 +0,0 @@ -4077.307063200001 -+++++ -conf1 1 1 84.42 0.0 -1 gpu conv fp32 11 -2 gpu batchnorm fp32 11 -3 gpu relu fp32 11 -4 gpu group_conv fp32 11 -5 gpu batchnorm fp32 11 -6 gpu relu fp32 11 -7 gpu conv fp32 11 -8 gpu batchnorm fp32 11 -9 gpu relu fp32 11 -10 gpu group_conv fp32 11 -11 gpu batchnorm fp32 11 -12 gpu relu fp32 11 -13 gpu conv fp32 11 -14 gpu batchnorm fp32 11 -15 gpu relu fp32 11 -16 gpu group_conv fp32 11 -17 gpu batchnorm fp32 11 -18 gpu relu fp32 11 -19 gpu conv fp32 11 -20 gpu batchnorm fp32 11 -21 gpu relu fp32 11 -22 gpu group_conv fp32 11 -23 gpu batchnorm fp32 11 -24 gpu relu fp32 11 -25 gpu conv fp32 11 -26 gpu batchnorm fp32 11 -27 gpu relu fp32 11 -28 gpu group_conv fp32 11 -29 gpu batchnorm fp32 11 -30 gpu relu fp32 11 -31 gpu conv fp32 11 -32 gpu batchnorm fp32 11 -33 gpu relu fp32 11 -34 gpu group_conv fp32 11 -35 gpu batchnorm fp32 11 -36 gpu relu fp32 11 -37 gpu conv fp32 11 -38 gpu batchnorm fp32 11 -39 gpu relu fp32 11 -40 gpu group_conv fp32 11 -41 gpu batchnorm fp32 11 -42 gpu relu fp32 11 -43 gpu conv fp32 11 -44 gpu batchnorm fp32 11 -45 gpu relu fp32 11 -46 gpu group_conv fp32 11 -47 gpu batchnorm fp32 11 -48 gpu relu fp32 11 -49 gpu conv fp32 11 -50 gpu batchnorm fp32 11 -51 gpu relu fp32 11 -52 gpu group_conv fp32 11 -53 gpu batchnorm fp32 11 -54 gpu relu fp32 11 -55 gpu conv fp32 11 -56 gpu batchnorm fp32 11 -57 gpu relu fp32 11 -58 gpu group_conv fp32 11 -59 gpu batchnorm fp32 11 -60 gpu relu fp32 11 -61 gpu conv fp32 11 -62 gpu batchnorm fp32 11 -63 gpu relu fp32 11 -64 gpu group_conv fp32 11 -65 gpu batchnorm fp32 11 -66 gpu relu fp32 11 -67 gpu conv fp32 11 -68 gpu batchnorm fp32 11 -69 gpu relu fp32 11 -70 gpu group_conv fp32 11 -71 gpu batchnorm fp32 11 -72 gpu relu fp32 11 -73 gpu conv fp32 11 -74 gpu batchnorm fp32 11 -75 gpu relu fp32 11 -76 gpu group_conv fp32 11 -77 gpu batchnorm fp32 11 -78 gpu relu fp32 11 -79 gpu conv fp32 11 -80 gpu batchnorm fp32 11 -81 gpu relu fp32 11 -82 gpu pool_mean fp32 11 -83 gpu mul fp32 11 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf2 1.4930855091460031 1.447990050940341 83.72 0.7000000000000028 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv fp16 12 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf3 1.493397883226807 1.449591062426989 83.72 0.7000000000000028 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 163 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf4 1.4934429016801338 1.4500582352111675 83.72 0.7000000000000028 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 168 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf5 1.4938214813031556 1.450038222978811 83.72 0.7000000000000028 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 157 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf6 1.4933879828131855 1.449975636202813 83.72 0.7000000000000028 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 160 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf7 1.492663093331302 1.4487067754520524 83.7 0.7199999999999989 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 167 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf8 1.495724395088184 1.4507925552157772 83.56 0.8599999999999994 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 162 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf9 1.496506307637598 1.4521705950285135 83.36 1.0600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 162 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf10 1.496532672928805 1.4521696542076958 83.36 1.0600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 156 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf11 1.4988418058849937 1.4555327556053628 83.28 1.1400000000000006 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 164 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf12 1.4994289979945077 1.4562439330251535 83.28 1.1400000000000006 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf13 1.4952028793065038 1.450369851058777 83.14 1.2800000000000011 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 162 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf14 1.4933978285280285 1.448265686258097 83.12 1.2999999999999972 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf15 1.491958833559989 1.4459262032919467 83.08 1.3400000000000034 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 157 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf16 1.4937317297990984 1.4498121856525021 83.02000000000001 1.3999999999999915 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf17 1.4963413808686974 1.4522391736954623 82.86 1.5600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 165 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf18 1.4942172827099065 1.4504631324933321 82.86 1.5600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 157 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf19 1.4963964073376739 1.4525461321361477 82.86 1.5600000000000023 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf20 1.4932583049858652 1.4472547227714012 82.84 1.5799999999999983 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv samp_fp16 266 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf21 1.4964326545281064 1.4526263046333605 82.82000000000001 1.5999999999999943 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf22 1.4966042483929347 1.4527859961226985 82.82000000000001 1.5999999999999943 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf23 1.4966008974318024 1.4527415844509437 82.78 1.6400000000000006 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 155 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf24 1.4932738366973777 1.448820445466833 82.64 1.7800000000000011 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 164 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 157 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf25 1.4940402684133964 1.447332235394843 82.48 1.9399999999999977 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv samp_fp16 261 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf26 1.4981764588414919 1.4530714150549078 82.39999999999999 2.0200000000000102 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 161 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf27 1.5004334658773033 1.4549115105608688 82.3 2.1200000000000045 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 156 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf28 1.5006808163336343 1.4553824345285296 82.3 2.1200000000000045 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf29 1.4999870719460484 1.4571625511374704 82.28 2.1400000000000006 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 165 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf30 1.500042366879961 1.4574715946270216 82.28 2.1400000000000006 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf31 1.500214789632402 1.4576323532660131 82.28 2.1400000000000006 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 163 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 164 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 151 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf32 1.4927009086066445 1.4484049211953174 82.26 2.1599999999999966 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 164 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 161 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 156 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf33 1.5003438014588875 1.4538240352408085 82.22 2.200000000000003 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 152 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf34 1.5041587978616728 1.4610492456195174 82.02000000000001 2.3999999999999915 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 161 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 152 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 158 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf35 1.5000040131742656 1.4555601139156464 81.88 2.5400000000000063 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv fp16 12 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv perf_fp16 152 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 12 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv perf_fp16 161 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 151 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 151 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 167 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf36 1.4950571524902583 1.451478376045808 81.84 2.5799999999999983 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 164 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv perf_fp16 161 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 161 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 155 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ -+++++ -conf37 1.4975271575548847 1.4532126224638244 81.44 2.980000000000004 -1 gpu conv fp16 12 -2 gpu batchnorm fp16 12 -3 gpu relu fp16 12 -4 gpu group_conv fp16 12 -5 gpu batchnorm fp16 12 -6 gpu relu fp16 12 -7 gpu conv fp16 12 -8 gpu batchnorm fp16 12 -9 gpu relu fp16 12 -10 gpu group_conv fp16 12 -11 gpu batchnorm fp16 12 -12 gpu relu fp16 12 -13 gpu conv fp16 12 -14 gpu batchnorm fp16 12 -15 gpu relu fp16 12 -16 gpu group_conv fp16 12 -17 gpu batchnorm fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 164 -20 gpu batchnorm fp16 12 -21 gpu relu fp16 12 -22 gpu group_conv fp16 12 -23 gpu batchnorm fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu group_conv fp16 12 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 -32 gpu batchnorm fp16 12 -33 gpu relu fp16 12 -34 gpu group_conv fp16 12 -35 gpu batchnorm fp16 12 -36 gpu relu fp16 12 -37 gpu conv fp16 11 -38 gpu batchnorm fp16 12 -39 gpu relu fp16 12 -40 gpu group_conv fp16 12 -41 gpu batchnorm fp16 12 -42 gpu relu fp16 12 -43 gpu conv fp16 12 -44 gpu batchnorm fp16 12 -45 gpu relu fp16 12 -46 gpu group_conv fp16 12 -47 gpu batchnorm fp16 12 -48 gpu relu fp16 12 -49 gpu conv perf_fp16 155 -50 gpu batchnorm fp16 12 -51 gpu relu fp16 12 -52 gpu group_conv fp16 12 -53 gpu batchnorm fp16 12 -54 gpu relu fp16 12 -55 gpu conv perf_fp16 155 -56 gpu batchnorm fp16 12 -57 gpu relu fp16 12 -58 gpu group_conv fp16 12 -59 gpu batchnorm fp16 12 -60 gpu relu fp16 12 -61 gpu conv perf_fp16 151 -62 gpu batchnorm fp16 12 -63 gpu relu fp16 12 -64 gpu group_conv fp16 12 -65 gpu batchnorm fp16 12 -66 gpu relu fp16 12 -67 gpu conv perf_fp16 155 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu group_conv fp16 12 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv perf_fp16 152 -74 gpu batchnorm fp16 12 -75 gpu relu fp16 12 -76 gpu group_conv fp16 12 -77 gpu batchnorm fp16 12 -78 gpu relu fp16 12 -79 gpu conv perf_fp16 153 -80 gpu batchnorm fp16 12 -81 gpu relu fp16 12 -82 gpu pool_mean fp16 12 -83 gpu mul fp16 12 add fp16 1 -84 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/resnet18_cifar10/resnet18_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/resnet18_cifar10/resnet18_cifar10.txt deleted file mode 100644 index 654cffbf632686dca6310a93ecf56b6521e32039..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/resnet18_cifar10/resnet18_cifar10.txt +++ /dev/null @@ -1,2296 +0,0 @@ -2484.981244 -+++++ -conf1 1 1 89.56 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 -3 gpu conv fp32 11 add fp32 1 -4 gpu add fp32 11 -5 gpu relu fp32 11 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 -8 gpu add fp32 11 -9 gpu relu fp32 11 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 -11 gpu conv fp32 11 add fp32 1 -12 gpu add fp32 11 -13 gpu relu fp32 11 -14 gpu conv fp32 11 add fp32 1 relu fp32 1 -15 gpu conv fp32 11 add fp32 1 -16 gpu conv fp32 11 add fp32 1 -17 gpu add fp32 11 -18 gpu relu fp32 11 -19 gpu conv fp32 11 add fp32 1 relu fp32 1 -20 gpu conv fp32 11 add fp32 1 -21 gpu add fp32 11 -22 gpu relu fp32 11 -23 gpu conv fp32 11 add fp32 1 relu fp32 1 -24 gpu conv fp32 11 add fp32 1 -25 gpu add fp32 11 -26 gpu relu fp32 11 -27 gpu conv fp32 11 add fp32 1 relu fp32 1 -28 gpu conv fp32 11 add fp32 1 -29 gpu conv fp32 11 add fp32 1 -30 gpu add fp32 11 -31 gpu relu fp32 11 -32 gpu conv fp32 11 add fp32 1 relu fp32 1 -33 gpu conv fp32 11 add fp32 1 -34 gpu add fp32 11 -35 gpu relu fp32 11 -36 gpu conv fp32 11 add fp32 1 relu fp32 1 -37 gpu conv fp32 11 add fp32 1 -38 gpu add fp32 11 -39 gpu relu fp32 11 -40 gpu pool_mean fp32 11 -41 gpu mul fp32 11 add fp32 1 -42 gpu softmax fp32 1 ------ -+++++ -conf2 1.767527790869615 1.7962938589450996 88.96 0.6000000000000085 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 155 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 160 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf3 1.7676486174436143 1.7967155014984917 88.78 0.7800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 155 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 160 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf4 1.7674352647250422 1.792910560846682 88.7 0.8599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 160 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf5 1.8655703338511067 1.8930089896922888 88.53999999999999 1.0200000000000102 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 167 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 158 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv perf_fp16 159 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 165 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 157 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf6 1.9070428103729684 1.9172857853336078 88.38000000000001 1.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 157 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv samp_fp16 266 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 152 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv samp_fp16 261 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf7 1.769778590701739 1.7956222622694236 88.24 1.3200000000000074 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv fp16 12 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv samp_fp16 268 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf8 1.841404652091802 1.8677947628418006 88.24 1.3200000000000074 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 162 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf9 1.8679349428783487 1.8995927920729931 88.22 1.3400000000000034 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 160 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 161 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf10 1.876937310100899 1.9041581451399825 88.1 1.460000000000008 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 158 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf11 1.842140004857965 1.8673692956620238 88.06 1.5 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 167 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf12 1.9070567138857761 1.9165525910492667 88.02 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 157 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv samp_fp16 266 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 261 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 152 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf13 1.9185835698271805 1.9328202469403 87.98 1.5799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 157 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv samp_fp16 266 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 152 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 152 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf14 1.781744853993609 1.8082995958456516 87.92 1.6400000000000006 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 168 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv perf_fp16 159 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 165 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv samp_fp16 265 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv samp_fp16 268 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf15 1.9185835698271805 1.9328202469403 87.92 1.6400000000000006 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 157 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv samp_fp16 266 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 152 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 152 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 12 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf16 1.875261840315855 1.8986912653657988 87.88 1.6800000000000068 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 159 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 12 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf17 1.9013559086026153 1.9230901214481015 87.86 1.7000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf18 1.9185835698271805 1.9328202469403 87.83999999999999 1.720000000000013 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 157 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv samp_fp16 266 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 152 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 152 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf19 1.8770503055325798 1.9007923328014182 87.82 1.740000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 158 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 151 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf20 1.8774136276932418 1.90365663123621 87.82 1.740000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 158 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf21 1.943143041264842 1.9591958561422729 87.82 1.740000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf22 1.870789918969847 1.8863625217899933 87.8 1.7600000000000051 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 264 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf23 1.7445941809066292 1.7754934270309912 87.78 1.7800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 155 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 160 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv perf_fp16 166 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf24 1.9065930313550916 1.928938946228637 87.78 1.7800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 167 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf25 1.9021824494907031 1.9237134505552098 87.78 1.7800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 154 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf26 1.9017271009017505 1.9211078231701697 87.78 1.7800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf27 1.8187224917656395 1.820406007609536 87.76 1.7999999999999972 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv samp_fp16 264 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf28 1.9070855899343322 1.9285210655709735 87.76 1.7999999999999972 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv samp_fp16 268 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf29 1.9013559086026153 1.9230901214481015 87.74 1.8200000000000074 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf30 1.8772990284718367 1.9022146647342513 87.72 1.8400000000000034 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 158 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf31 1.9013559086026153 1.9230901214481015 87.68 1.8799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf32 1.9020502478364545 1.923319572598976 87.66000000000001 1.8999999999999915 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf33 1.7516394053514481 1.7809034526471939 87.62 1.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 155 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 160 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv perf_fp16 166 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf34 1.7814953252955337 1.8122658147993431 87.62 1.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 162 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv perf_fp16 160 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 155 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv fp16 12 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 160 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv perf_fp16 166 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 155 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf35 1.887538247557846 1.9103369445911678 87.62 1.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 158 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf36 1.9107566783735581 1.9273803227885578 87.6 1.960000000000008 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 157 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf37 1.9013559086026153 1.9230901214481015 87.58 1.980000000000004 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 12 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf38 1.8984089819969947 1.9195632881772446 87.58 1.980000000000004 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf39 1.9020502478364545 1.923319572598976 87.52 2.0400000000000063 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf40 1.9020502478364545 1.923319572598976 87.52 2.0400000000000063 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf41 1.9013559086026153 1.9230901214481015 87.5 2.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf42 1.9013559086026153 1.9230901214481015 87.46000000000001 2.0999999999999943 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv fp16 11 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf43 1.9196179152539186 1.9443459719929068 87.44 2.1200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 153 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf44 1.9020502478364545 1.923319572598976 87.4 2.1599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf45 1.9152817031040366 1.9357432559063958 87.4 2.1599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf46 1.915754791147898 1.9373322475753219 87.4 2.1599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf47 1.9130551004051772 1.9409232417921056 87.38 2.180000000000007 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv perf_fp16 153 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf48 1.9421147660673033 1.9584555432766413 87.38 2.180000000000007 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf49 1.9052849920081363 1.9300100333661123 87.32 2.240000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 153 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf50 1.9154322863033566 1.934908329027621 87.3 2.260000000000005 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv perf_fp16 151 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ -+++++ -conf51 1.9079703554020564 1.9287218218306195 86.96000000000001 2.5999999999999943 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 -3 gpu conv fp16 12 add fp16 1 -4 gpu add fp16 12 -5 gpu relu fp16 12 -6 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 161 add fp16 1 -8 gpu add fp16 12 -9 gpu relu fp16 12 -10 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 -12 gpu add fp16 12 -13 gpu relu fp16 12 -14 gpu conv fp16 12 add fp16 1 relu fp16 1 -15 gpu conv fp16 12 add fp16 1 -16 gpu conv fp16 11 add fp16 1 -17 gpu add fp16 12 -18 gpu relu fp16 12 -19 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -20 gpu conv samp_fp16 262 add fp16 1 -21 gpu add fp16 12 -22 gpu relu fp16 12 -23 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 -24 gpu conv perf_fp16 153 add fp16 1 -25 gpu add fp16 12 -26 gpu relu fp16 12 -27 gpu conv fp16 12 add fp16 1 relu fp16 1 -28 gpu conv fp16 12 add fp16 1 -29 gpu conv samp_fp16 261 add fp16 1 -30 gpu add fp16 12 -31 gpu relu fp16 12 -32 gpu conv fp16 12 add fp16 1 relu fp16 1 -33 gpu conv fp16 12 add fp16 1 -34 gpu add fp16 12 -35 gpu relu fp16 12 -36 gpu conv fp16 12 add fp16 1 relu fp16 1 -37 gpu conv perf_fp16 152 add fp16 1 -38 gpu add fp16 12 -39 gpu relu fp16 12 -40 gpu pool_mean fp16 12 -41 gpu mul fp16 12 add fp16 1 -42 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/resnet50_imagenet/resnet50_imagenet.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/resnet50_imagenet/resnet50_imagenet.txt deleted file mode 100644 index 094eed413b520f9dd661797b96735438861d1c08..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/resnet50_imagenet/resnet50_imagenet.txt +++ /dev/null @@ -1,1057 +0,0 @@ -7161.053769000008 -+++++ -conf1 1 1 75.7 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu batchnorm fp32 11 -3 gpu conv fp32 11 add fp32 1 -4 gpu batchnorm fp32 11 -5 gpu relu fp32 11 -6 gpu conv fp32 11 add fp32 1 -7 gpu batchnorm fp32 11 -8 gpu relu fp32 11 -9 gpu conv fp32 11 add fp32 1 -10 gpu batchnorm fp32 11 -11 gpu conv fp32 11 add fp32 1 -12 gpu batchnorm fp32 11 -13 gpu add fp32 11 -14 gpu relu fp32 11 -15 gpu conv fp32 11 add fp32 1 -16 gpu batchnorm fp32 11 -17 gpu relu fp32 11 -18 gpu conv fp32 11 add fp32 1 -19 gpu batchnorm fp32 11 -20 gpu relu fp32 11 -21 gpu conv fp32 11 add fp32 1 -22 gpu batchnorm fp32 11 -23 gpu add fp32 11 -24 gpu relu fp32 11 -25 gpu conv fp32 11 add fp32 1 -26 gpu batchnorm fp32 11 -27 gpu relu fp32 11 -28 gpu conv fp32 11 add fp32 1 -29 gpu batchnorm fp32 11 -30 gpu relu fp32 11 -31 gpu conv fp32 11 add fp32 1 -32 gpu batchnorm fp32 11 -33 gpu add fp32 11 -34 gpu relu fp32 11 -35 gpu conv fp32 11 add fp32 1 -36 gpu batchnorm fp32 11 -37 gpu relu fp32 11 -38 gpu conv fp32 11 add fp32 1 -39 gpu batchnorm fp32 11 -40 gpu relu fp32 11 -41 gpu conv fp32 11 add fp32 1 -42 gpu batchnorm fp32 11 -43 gpu conv fp32 11 add fp32 1 -44 gpu batchnorm fp32 11 -45 gpu add fp32 11 -46 gpu relu fp32 11 -47 gpu conv fp32 11 add fp32 1 -48 gpu batchnorm fp32 11 -49 gpu relu fp32 11 -50 gpu conv fp32 11 add fp32 1 -51 gpu batchnorm fp32 11 -52 gpu relu fp32 11 -53 gpu conv fp32 11 add fp32 1 -54 gpu batchnorm fp32 11 -55 gpu add fp32 11 -56 gpu relu fp32 11 -57 gpu conv fp32 11 add fp32 1 -58 gpu batchnorm fp32 11 -59 gpu relu fp32 11 -60 gpu conv fp32 11 add fp32 1 -61 gpu batchnorm fp32 11 -62 gpu relu fp32 11 -63 gpu conv fp32 11 add fp32 1 -64 gpu batchnorm fp32 11 -65 gpu add fp32 11 -66 gpu relu fp32 11 -67 gpu conv fp32 11 add fp32 1 -68 gpu batchnorm fp32 11 -69 gpu relu fp32 11 -70 gpu conv fp32 11 add fp32 1 -71 gpu batchnorm fp32 11 -72 gpu relu fp32 11 -73 gpu conv fp32 11 add fp32 1 -74 gpu batchnorm fp32 11 -75 gpu add fp32 11 -76 gpu relu fp32 11 -77 gpu conv fp32 11 add fp32 1 -78 gpu batchnorm fp32 11 -79 gpu relu fp32 11 -80 gpu conv fp32 11 add fp32 1 -81 gpu batchnorm fp32 11 -82 gpu relu fp32 11 -83 gpu conv fp32 11 add fp32 1 -84 gpu batchnorm fp32 11 -85 gpu conv fp32 11 add fp32 1 -86 gpu batchnorm fp32 11 -87 gpu add fp32 11 -88 gpu relu fp32 11 -89 gpu conv fp32 11 add fp32 1 -90 gpu batchnorm fp32 11 -91 gpu relu fp32 11 -92 gpu conv fp32 11 add fp32 1 -93 gpu batchnorm fp32 11 -94 gpu relu fp32 11 -95 gpu conv fp32 11 add fp32 1 -96 gpu batchnorm fp32 11 -97 gpu add fp32 11 -98 gpu relu fp32 11 -99 gpu conv fp32 11 add fp32 1 -100 gpu batchnorm fp32 11 -101 gpu relu fp32 11 -102 gpu conv fp32 11 add fp32 1 -103 gpu batchnorm fp32 11 -104 gpu relu fp32 11 -105 gpu conv fp32 11 add fp32 1 -106 gpu batchnorm fp32 11 -107 gpu add fp32 11 -108 gpu relu fp32 11 -109 gpu conv fp32 11 add fp32 1 -110 gpu batchnorm fp32 11 -111 gpu relu fp32 11 -112 gpu conv fp32 11 add fp32 1 -113 gpu batchnorm fp32 11 -114 gpu relu fp32 11 -115 gpu conv fp32 11 add fp32 1 -116 gpu batchnorm fp32 11 -117 gpu add fp32 11 -118 gpu relu fp32 11 -119 gpu conv fp32 11 add fp32 1 -120 gpu batchnorm fp32 11 -121 gpu relu fp32 11 -122 gpu conv fp32 11 add fp32 1 -123 gpu batchnorm fp32 11 -124 gpu relu fp32 11 -125 gpu conv fp32 11 add fp32 1 -126 gpu batchnorm fp32 11 -127 gpu add fp32 11 -128 gpu relu fp32 11 -129 gpu conv fp32 11 add fp32 1 -130 gpu batchnorm fp32 11 -131 gpu relu fp32 11 -132 gpu conv fp32 11 add fp32 1 -133 gpu batchnorm fp32 11 -134 gpu relu fp32 11 -135 gpu conv fp32 11 add fp32 1 -136 gpu batchnorm fp32 11 -137 gpu add fp32 11 -138 gpu relu fp32 11 -139 gpu conv fp32 11 add fp32 1 -140 gpu batchnorm fp32 11 -141 gpu relu fp32 11 -142 gpu conv fp32 11 add fp32 1 -143 gpu batchnorm fp32 11 -144 gpu relu fp32 11 -145 gpu conv fp32 11 add fp32 1 -146 gpu batchnorm fp32 11 -147 gpu conv fp32 11 add fp32 1 -148 gpu batchnorm fp32 11 -149 gpu add fp32 11 -150 gpu relu fp32 11 -151 gpu conv fp32 11 add fp32 1 -152 gpu batchnorm fp32 11 -153 gpu relu fp32 11 -154 gpu conv fp32 11 add fp32 1 -155 gpu batchnorm fp32 11 -156 gpu relu fp32 11 -157 gpu conv fp32 11 add fp32 1 -158 gpu batchnorm fp32 11 -159 gpu add fp32 11 -160 gpu relu fp32 11 -161 gpu conv fp32 11 add fp32 1 -162 gpu batchnorm fp32 11 -163 gpu relu fp32 11 -164 gpu conv fp32 11 add fp32 1 -165 gpu batchnorm fp32 11 -166 gpu relu fp32 11 -167 gpu conv fp32 11 add fp32 1 -168 gpu batchnorm fp32 11 -169 gpu add fp32 11 -170 gpu relu fp32 11 -171 gpu pool_max fp32 11 -172 gpu mul fp32 11 add fp32 1 -173 gpu softmax fp32 1 ------ -+++++ -conf2 1.8254789092281507 1.4527803526239977 75.7 0.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv fp16 12 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv fp16 12 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf3 1.8254789092281507 1.4527803526239977 75.7 0.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv fp16 12 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv fp16 12 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf4 1.8254789092281507 1.4527803526239977 75.7 0.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv fp16 12 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv fp16 12 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv fp16 12 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv fp16 12 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf5 1.8323072136026506 1.457112696128105 74.76 0.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv fp16 12 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv perf_fp16 157 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv fp16 12 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv perf_fp16 152 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ -+++++ -conf6 1.8333922701839533 1.4589203187717397 74.53999999999999 1.1600000000000108 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu batchnorm fp16 12 -3 gpu conv fp16 12 add fp16 1 -4 gpu batchnorm fp16 12 -5 gpu relu fp16 12 -6 gpu conv fp16 12 add fp16 1 -7 gpu batchnorm fp16 12 -8 gpu relu fp16 12 -9 gpu conv fp16 12 add fp16 1 -10 gpu batchnorm fp16 12 -11 gpu conv fp16 12 add fp16 1 -12 gpu batchnorm fp16 12 -13 gpu add fp16 12 -14 gpu relu fp16 12 -15 gpu conv fp16 12 add fp16 1 -16 gpu batchnorm fp16 12 -17 gpu relu fp16 12 -18 gpu conv fp16 12 add fp16 1 -19 gpu batchnorm fp16 12 -20 gpu relu fp16 12 -21 gpu conv fp16 12 add fp16 1 -22 gpu batchnorm fp16 12 -23 gpu add fp16 12 -24 gpu relu fp16 12 -25 gpu conv fp16 12 add fp16 1 -26 gpu batchnorm fp16 12 -27 gpu relu fp16 12 -28 gpu conv fp16 12 add fp16 1 -29 gpu batchnorm fp16 12 -30 gpu relu fp16 12 -31 gpu conv fp16 12 add fp16 1 -32 gpu batchnorm fp16 12 -33 gpu add fp16 12 -34 gpu relu fp16 12 -35 gpu conv fp16 12 add fp16 1 -36 gpu batchnorm fp16 12 -37 gpu relu fp16 12 -38 gpu conv fp16 12 add fp16 1 -39 gpu batchnorm fp16 12 -40 gpu relu fp16 12 -41 gpu conv fp16 12 add fp16 1 -42 gpu batchnorm fp16 12 -43 gpu conv fp16 12 add fp16 1 -44 gpu batchnorm fp16 12 -45 gpu add fp16 12 -46 gpu relu fp16 12 -47 gpu conv fp16 12 add fp16 1 -48 gpu batchnorm fp16 12 -49 gpu relu fp16 12 -50 gpu conv fp16 12 add fp16 1 -51 gpu batchnorm fp16 12 -52 gpu relu fp16 12 -53 gpu conv fp16 12 add fp16 1 -54 gpu batchnorm fp16 12 -55 gpu add fp16 12 -56 gpu relu fp16 12 -57 gpu conv fp16 12 add fp16 1 -58 gpu batchnorm fp16 12 -59 gpu relu fp16 12 -60 gpu conv fp16 12 add fp16 1 -61 gpu batchnorm fp16 12 -62 gpu relu fp16 12 -63 gpu conv fp16 12 add fp16 1 -64 gpu batchnorm fp16 12 -65 gpu add fp16 12 -66 gpu relu fp16 12 -67 gpu conv fp16 12 add fp16 1 -68 gpu batchnorm fp16 12 -69 gpu relu fp16 12 -70 gpu conv fp16 12 add fp16 1 -71 gpu batchnorm fp16 12 -72 gpu relu fp16 12 -73 gpu conv fp16 12 add fp16 1 -74 gpu batchnorm fp16 12 -75 gpu add fp16 12 -76 gpu relu fp16 12 -77 gpu conv fp16 12 add fp16 1 -78 gpu batchnorm fp16 12 -79 gpu relu fp16 12 -80 gpu conv fp16 12 add fp16 1 -81 gpu batchnorm fp16 12 -82 gpu relu fp16 12 -83 gpu conv fp16 12 add fp16 1 -84 gpu batchnorm fp16 12 -85 gpu conv fp16 12 add fp16 1 -86 gpu batchnorm fp16 12 -87 gpu add fp16 12 -88 gpu relu fp16 12 -89 gpu conv fp16 12 add fp16 1 -90 gpu batchnorm fp16 12 -91 gpu relu fp16 12 -92 gpu conv fp16 12 add fp16 1 -93 gpu batchnorm fp16 12 -94 gpu relu fp16 12 -95 gpu conv fp16 12 add fp16 1 -96 gpu batchnorm fp16 12 -97 gpu add fp16 12 -98 gpu relu fp16 12 -99 gpu conv perf_fp16 157 add fp16 1 -100 gpu batchnorm fp16 12 -101 gpu relu fp16 12 -102 gpu conv samp_fp16 267 add fp16 1 -103 gpu batchnorm fp16 12 -104 gpu relu fp16 12 -105 gpu conv fp16 12 add fp16 1 -106 gpu batchnorm fp16 12 -107 gpu add fp16 12 -108 gpu relu fp16 12 -109 gpu conv fp16 12 add fp16 1 -110 gpu batchnorm fp16 12 -111 gpu relu fp16 12 -112 gpu conv fp16 12 add fp16 1 -113 gpu batchnorm fp16 12 -114 gpu relu fp16 12 -115 gpu conv fp16 12 add fp16 1 -116 gpu batchnorm fp16 12 -117 gpu add fp16 12 -118 gpu relu fp16 12 -119 gpu conv fp16 12 add fp16 1 -120 gpu batchnorm fp16 12 -121 gpu relu fp16 12 -122 gpu conv fp16 12 add fp16 1 -123 gpu batchnorm fp16 12 -124 gpu relu fp16 12 -125 gpu conv fp16 12 add fp16 1 -126 gpu batchnorm fp16 12 -127 gpu add fp16 12 -128 gpu relu fp16 12 -129 gpu conv fp16 12 add fp16 1 -130 gpu batchnorm fp16 12 -131 gpu relu fp16 12 -132 gpu conv fp16 12 add fp16 1 -133 gpu batchnorm fp16 12 -134 gpu relu fp16 12 -135 gpu conv fp16 12 add fp16 1 -136 gpu batchnorm fp16 12 -137 gpu add fp16 12 -138 gpu relu fp16 12 -139 gpu conv fp16 12 add fp16 1 -140 gpu batchnorm fp16 12 -141 gpu relu fp16 12 -142 gpu conv fp16 12 add fp16 1 -143 gpu batchnorm fp16 12 -144 gpu relu fp16 12 -145 gpu conv fp16 12 add fp16 1 -146 gpu batchnorm fp16 12 -147 gpu conv fp16 12 add fp16 1 -148 gpu batchnorm fp16 12 -149 gpu add fp16 12 -150 gpu relu fp16 12 -151 gpu conv fp16 12 add fp16 1 -152 gpu batchnorm fp16 12 -153 gpu relu fp16 12 -154 gpu conv fp16 12 add fp16 1 -155 gpu batchnorm fp16 12 -156 gpu relu fp16 12 -157 gpu conv fp16 12 add fp16 1 -158 gpu batchnorm fp16 12 -159 gpu add fp16 12 -160 gpu relu fp16 12 -161 gpu conv fp16 12 add fp16 1 -162 gpu batchnorm fp16 12 -163 gpu relu fp16 12 -164 gpu conv perf_fp16 152 add fp16 1 -165 gpu batchnorm fp16 12 -166 gpu relu fp16 12 -167 gpu conv fp16 12 add fp16 1 -168 gpu batchnorm fp16 12 -169 gpu add fp16 12 -170 gpu relu fp16 12 -171 gpu pool_max fp16 12 -172 gpu mul fp16 12 add fp16 1 -173 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_cifar10/vgg16_cifar10.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_cifar10/vgg16_cifar10.txt deleted file mode 100644 index 2b325a9fe2d122e74cdd2b80e2768e68591313bf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_cifar10/vgg16_cifar10.txt +++ /dev/null @@ -1,913 +0,0 @@ -3776.508929999999 -+++++ -conf1 1 1 89.96 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 -16 gpu softmax fp32 1 ------ -+++++ -conf2 2.1225958306417145 1.9771056444390926 89.91 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 -12 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf3 2.090180991844805 1.9532689756636086 89.82 0.14000000000000057 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 -12 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf4 2.169931036393396 2.0048851858669283 89.53999999999999 0.4200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf5 2.1012179398201756 1.9325098819632314 89.42 0.539999999999992 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf6 2.2313002482945326 2.069581185407626 89.38000000000001 0.5799999999999841 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 158 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf7 2.143061101834193 1.9675759235961738 89.3 0.6599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf8 2.199379444387758 2.0314348091429677 89.2 0.7599999999999909 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf9 2.3236298452294624 2.156907976575644 89.03999999999999 0.9200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf10 2.3224369486241603 2.1560351277882046 89.03999999999999 0.9200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf11 2.358467412507993 2.1904290636262784 89.02 0.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf12 2.3633503986583126 2.1980949050120437 88.88000000000001 1.079999999999984 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf13 2.4903388172036043 2.3063593441573564 88.82 1.1400000000000006 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf14 2.508156996742662 2.3204109539869595 88.78 1.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf15 2.4818531813049622 2.2910866330696744 88.75999999999999 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf16 2.4591564896606 2.272664410995804 88.74 1.2199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf17 2.5370582721089496 2.3464665753522405 88.72 1.2399999999999949 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf18 2.438100014978735 2.257620696759345 88.7 1.259999999999991 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf19 2.4776935382337006 2.2949598026093168 88.7 1.259999999999991 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf20 2.4380041604279596 2.254330054479329 88.68 1.279999999999987 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf21 2.4745444350223327 2.2883888475386525 88.64 1.3199999999999932 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf22 2.4136652022060625 2.2360545757445407 88.52 1.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf23 2.510093966915115 2.316437144001897 88.52 1.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf24 2.475990790728594 2.28127562431577 88.5 1.4599999999999937 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf25 2.4761929121466926 2.290365501363375 88.5 1.4599999999999937 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf26 2.4763575559033875 2.291312348847263 88.5 1.4599999999999937 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf27 2.600249602991055 2.4123747341424644 88.06 1.8999999999999915 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 165 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf28 2.596077615026303 2.4115375655840245 88.02 1.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf29 2.580888020555937 2.3840829703999833 87.88 2.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf30 2.556352783745439 2.3641413704751537 87.8 2.1599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf31 2.5559756082494527 2.3677471703724575 87.78 2.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 11 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf32 2.597413373332546 2.4091972878097585 87.76 2.1999999999999886 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf33 2.4797467027434656 2.2874608793842612 87.74 2.219999999999999 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf34 2.593675604602072 2.400513932866452 87.7 2.259999999999991 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf35 2.6300759173431336 2.432687374579977 87.62 2.339999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf36 2.5907083037103864 2.4042762580264356 87.6 2.3599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf37 2.6143261650366187 2.423427684623993 87.6 2.3599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf38 2.6144436259117203 2.4231961521843344 87.6 2.3599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf39 2.662088796913144 2.4660859696742032 87.6 2.3599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf40 2.6210428708834517 2.423389791646294 87.58 2.3799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf41 2.6399924349243533 2.4443864221157914 87.58 2.3799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf42 2.616443708384916 2.4217582570150697 87.58 2.3799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf43 2.6883473596205225 2.5036952786284137 87.5 2.4599999999999937 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf44 2.6117356623585875 2.420771216556161 87.48 2.4799999999999898 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf45 2.6359174040106708 2.444231592562593 87.48 2.4799999999999898 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf46 2.56504192294198 2.371871906722655 87.44 2.519999999999996 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf47 2.5652588453899727 2.3816996471861174 87.44 2.519999999999996 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf48 2.68806951500876 2.5007647690311425 87.14 2.819999999999993 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_cifar100/vgg16_cifar100.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_cifar100/vgg16_cifar100.txt deleted file mode 100644 index 2c29bedd096aec2c7f66afbe729353e372fac403..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_cifar100/vgg16_cifar100.txt +++ /dev/null @@ -1,970 +0,0 @@ -3768.819777999999 -+++++ -conf1 1 1 66.5 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 -16 gpu softmax fp32 1 ------ -+++++ -conf2 2.2877724452131787 2.08025704453875 66.45 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf3 2.5314658805383816 2.30737681453141 66.45 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf4 2.044123178914057 1.8616966918258782 66.32000000000001 0.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf5 2.231179358259141 2.0317825813373864 66.18 0.3199999999999932 -1 gpu conv fp16 11 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf6 2.2474834421641057 2.0338639876373272 65.88000000000001 0.6199999999999903 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf7 2.22281439516094 2.0205460706906377 65.88000000000001 0.6199999999999903 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -12 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf8 2.1625085012968484 1.94560449637282 65.88000000000001 0.6199999999999903 -1 gpu conv fp16 11 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv fp16 11 add fp16 1 relu fp16 1 -10 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf9 2.639337323402163 2.3960416499256825 65.8 0.7000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf10 2.672718090670276 2.4276905528801507 65.68 0.8199999999999932 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf11 2.699089631751789 2.446114054498494 65.68 0.8199999999999932 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf12 2.6003752638648767 2.3553067802112344 65.64 0.8599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf13 2.638763904718665 2.395072565223988 65.64 0.8599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf14 2.6003752638648767 2.3553067802112344 65.64 0.8599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf15 2.6003752638648767 2.3553067802112344 65.64 0.8599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf16 2.6732183804279006 2.4287517162140326 65.62 0.8799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf17 2.6728394017929027 2.428768169588016 65.60000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf18 2.4549989178389238 2.2406620346549433 65.56 0.9399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf19 2.673556689244081 2.429092581627209 65.52 0.980000000000004 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf20 2.6525635304451756 2.406830663552284 65.5 1.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf21 2.6692288605087553 2.423462800937785 65.5 1.0 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf22 2.583650505571873 2.3471533059252194 65.48 1.019999999999996 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf23 2.6474572655420125 2.400471260394867 65.48 1.019999999999996 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf24 2.4710116424304736 2.2555966923178996 65.46 1.0400000000000063 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf25 2.557911102074785 2.3292661683311526 65.46 1.0400000000000063 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf26 2.6032957018479532 2.367574146141511 65.44 1.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf27 2.6029968728098916 2.3672068592437223 65.44 1.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf28 2.602540311129756 2.3691028781436954 65.44 1.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf29 2.602756708588441 2.3708111025211718 65.44 1.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf30 2.603240857443844 2.3662875785790183 65.44 1.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf31 2.602882717372841 2.368011704225619 65.44 1.0600000000000023 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf32 2.67999343314603 2.4305182001043826 65.4 1.0999999999999943 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf33 2.670314990364046 2.4275308713267485 65.38000000000001 1.1199999999999903 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf34 2.650982630033638 2.405821467700663 65.36 1.1400000000000006 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf35 2.6507266317871756 2.405938171802741 65.36 1.1400000000000006 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf36 2.6523068534836174 2.406695716686769 65.34 1.1599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf37 2.6533198495191073 2.4077689394073865 65.34 1.1599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf38 2.64630900155657 2.4073892305914986 65.32 1.1800000000000068 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf39 2.6725522534379413 2.42903505877629 65.32 1.1800000000000068 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf40 2.6435249267602225 2.403536258709464 65.3 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 161 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf41 2.6442059720503557 2.4037376163252024 65.3 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf42 2.6536933126724027 2.4077527693156053 65.3 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf43 2.6442798101298948 2.4056031584129225 65.3 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf44 2.603921271336049 2.3665955131107683 65.28 1.2199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf45 2.4967248028856828 2.2748997625822716 65.25999999999999 1.240000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf46 2.4963953691980665 2.2764932409573166 65.25999999999999 1.240000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf47 2.678944927989822 2.4251978482969956 65.24 1.2600000000000051 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 264 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf48 2.6727135417173904 2.428897140422096 65.22 1.2800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf49 2.600256135586627 2.355428067042657 65.16 1.3400000000000034 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv fp16 11 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf50 2.264460006128871 2.058037581586567 64.9 1.5999999999999943 -1 gpu conv fp16 11 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv perf_fp16 165 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 263 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ -+++++ -conf51 2.2817447204106736 2.0758846029697513 64.84 1.6599999999999966 -1 gpu conv fp16 11 add fp16 1 relu fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv perf_fp16 165 add fp16 1 relu fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -9 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 265 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 -16 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_imagenet/vgg16_imagenet.txt b/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_imagenet/vgg16_imagenet.txt deleted file mode 100644 index 108a101c810f4ebe488e6f2029be4d970d7869a2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/predictive/vgg16_imagenet/vgg16_imagenet.txt +++ /dev/null @@ -1,561 +0,0 @@ -19194.623482 -+++++ -conf1 1 1 72.84 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 -6 gpu conv fp32 11 add fp32 1 relu fp32 1 -7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp32 11 add fp32 1 relu fp32 1 -9 gpu conv fp32 11 add fp32 1 relu fp32 1 -10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp32 11 add fp32 1 relu fp32 1 -12 gpu conv fp32 11 add fp32 1 relu fp32 1 -13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp32 11 add fp32 1 relu fp32 1 -15 gpu mul fp32 11 add fp32 1 relu fp32 1 -16 gpu mul fp32 11 add fp32 1 -17 gpu softmax fp32 1 ------ -+++++ -conf2 2.0787477568568082 1.7725701909562666 72.76 0.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf3 2.2877881266029436 1.9268677640464096 72.04 0.7999999999999972 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf4 2.493698381711785 2.0336802939709626 72.02 0.8200000000000074 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf5 2.164723960411776 1.8442442134020163 71.94 0.9000000000000057 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf6 2.53794461743687 2.069640641367895 71.67999999999999 1.1600000000000108 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf7 1.7943268128686711 1.6103705347377417 71.58 1.2600000000000051 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf8 1.8143284638396158 1.6288620764171362 71.5 1.3400000000000034 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv fp16 12 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf9 2.5462742331906263 2.076061630349781 71.48 1.3599999999999994 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf10 2.526515422129153 2.063839193109964 71.39999999999999 1.440000000000012 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf11 2.1596661517243856 1.8351710968407349 71.34 1.5 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 156 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf12 2.3444383477958337 1.981259839350623 71.22 1.6200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf13 1.8402020049200172 1.652343405000522 71.2 1.6400000000000006 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -13 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf14 2.6420417968257306 2.167425635999969 71.12 1.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf15 2.543198098440602 2.0805826545876145 71.1 1.740000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf16 2.6224991911009328 2.1476958232678807 70.89999999999999 1.940000000000012 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf17 2.5978010917593752 2.131515210392801 70.8 2.0400000000000063 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf18 2.623210258119482 2.156636511928761 70.76 2.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf19 2.598187894495609 2.1322228990374104 70.76 2.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf20 2.640464221374653 2.1682626030871295 70.76 2.0799999999999983 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf21 2.659563405662692 2.1881035849678936 70.54 2.299999999999997 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf22 2.636584103560761 2.1652496021557557 70.39999999999999 2.440000000000012 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 165 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf23 2.6315080449303547 2.161259580137757 70.38 2.460000000000008 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf24 2.7367939789033153 2.263326406058847 70.34 2.5 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 160 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf25 2.712182817327382 2.2404693918737233 70.24000000000001 2.5999999999999943 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf26 2.660510795888948 2.187299344706456 70.22 2.6200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -9 gpu conv fp16 12 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf27 2.457573203839654 2.0936930776435383 70.1 2.740000000000009 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ -+++++ -conf28 2.7452293174567757 2.2593302388139347 69.92 2.9200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 -6 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -7 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 -8 gpu conv fp16 12 add fp16 1 relu fp16 1 -9 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 -10 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -11 gpu conv fp16 12 add fp16 1 relu fp16 1 -12 gpu conv fp16 12 add fp16 1 relu fp16 1 -13 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 -14 gpu mul fp16 12 add fp16 1 relu fp16 1 -15 gpu mul fp16 12 add fp16 1 relu fp16 1 -16 gpu mul fp16 12 add fp16 1 -17 gpu softmax fp32 1 ------ diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/autotuner/img_pareto_curve.py b/hpvm/projects/hpvm-tensor-rt/autotuner/autotuner/img_pareto_curve.py deleted file mode 100644 index 5192eb7e580205ba9fcd368baa2e26358d4315d5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/autotuner/img_pareto_curve.py +++ /dev/null @@ -1,169 +0,0 @@ -import os -import shutil - - -AL_THRESHOLD = 0.1 - - -class Config: - def __init__(self): - self.avg_accuracy = 0 - self.avg_loss = 0 - self.speedup = 1 - self.fname = "" - self.flags = [] - - -def loadConfigsFromDir(result_dir): - def parseTopLine(x): - - toks = x.split() - - speedup = 1.0 - for tok in toks: - if "avg_accuracy" in tok: - avg_accuracy = float(tok.split("=")[1]) - if "speedup" in tok: - speedup = float(tok.split("=")[1]) - return avg_accuracy, speedup - - def skipFile(fname): - - skip_files = {} - skip_files["confidence_summary.txt"] = 1 - skip_files["promise_confs.txt"] = 1 - - if "accuracy" in fname: # *_accuracy files should be skipped - return True - - if "norms" in fname: # *_accuracy files should be skipped - return True - - if ".#" in fname: # *_accuracy files should be skipped - return True - - # if "_promise" in fname: # *_accuracy files should be skipped - # return True - - if not fname[-1].isdigit(): - return True - - if fname in skip_files: - return True - else: - return False - - config_arr = [] - file_names = os.listdir(result_dir) - - for fname in file_names: - if not skipFile(fname): - - fpath = result_dir + fname - config = Config() - f = open(fpath, "r") - - it = 0 - for x in f: - if x.strip == "": - continue - if it == 0: - avg_accuracy, speedup = parseTopLine(x) - config.avg_accuracy = avg_accuracy - config.avg_loss = -avg_accuracy - config.speedup = speedup - config.fname = fname - else: - flag = int(x.strip()) - config.flags.append(flag) - it += 1 - - config_arr.append(config) - - return config_arr - - -class Configuration: - def __init__(self, name, speedup, energy, accuracy, accuracy_loss): - self.name = name - self.speedup = speedup - self.energy = energy - self.accuracy = accuracy - self.accuracy_loss = accuracy_loss - - def __repr__(self): - return repr((self.name, self.speedup, self.energy, self.accuracy, self.accuracy_loss)) - - @classmethod - def from_config(cls, config): - return cls(config.fname, config.speedup, 0, config.avg_accuracy, config.avg_loss) - - @staticmethod - def energy_points(configurations): - return [ - (conf.energy, conf.accuracy) - for conf in configurations - ] - - @staticmethod - def speedup_points(configurations): - return [ - (conf.speedup, conf.accuracy) - for conf in configurations - ] - - -def is_pareto_efficient(configs, values, value_margins): - import numpy as np - from pprint import pprint - - np_values = np.array(values) - np_margins = np.array(value_margins) - is_efficient = np.ones(np_values.shape[0], dtype=bool) - for i, c in enumerate(np_values): - if is_efficient[i]: - # Keep any point with a higher value - is_efficient[is_efficient] = np.any( - np_values[is_efficient] + np_margins >= c, axis=1 - ) - is_efficient[i] = True # And keep self - return (np.array(configs)[is_efficient]).tolist() - - -def findParetoConfigs(base_dir, psnr_band_size): - result_dir = base_dir + "/pareto/" - try: - os.mkdir(result_dir) - except: - print "could not create dir" - - input_dir = base_dir + "/high_confidence/" - config_arr = loadConfigsFromDir(input_dir) - configurations = [Configuration.from_config( - config) for config in config_arr] - # energy_points = Configuration.energy_points(configurations) - speedup_points = Configuration.speedup_points(configurations) - - # No Pareto Selection if list is < 50 configurations - if len(configurations) < 50: - speedup_pareto = configurations # Include all in Pareto Frontier - else: - # energy_pareto = is_pareto_efficient(configurations, energy_points, ...) - speedup_pareto = is_pareto_efficient( - configurations, speedup_points, [-1e-2, psnr_band_size] - ) - print("len(configurations) = ", len(configurations)) - print("len(speedup_pareto) = ", len(speedup_pareto)) - - for conf in speedup_pareto: - #dst_path = conf.name.replace("full_results", "pareto") - src_path = input_dir + conf.name - dst_path = result_dir + conf.name - shutil.copy(src_path, dst_path) - - -if __name__ == "__main__": - from sys import argv - psnr_band_size = float(argv[2]) if len(argv) > 2 else 3.0 - print("*psnr_band_size = ", psnr_band_size) - findParetoConfigs(argv[1], psnr_band_size) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/alexnet_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/alexnet_layers.txt deleted file mode 100644 index 5741a41ba302af533e5f6e31be0611226dfbe7db..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/alexnet_layers.txt +++ /dev/null @@ -1,6 +0,0 @@ -conv add tanh pool -conv add tanh pool -conv add tanh -conv add tanh -conv add tanh pool -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/alexnet_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/alexnet_tensors.txt deleted file mode 100644 index 2a925c986fc2b82718bfb0497f01ce48a99db223..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/alexnet_tensors.txt +++ /dev/null @@ -1,26 +0,0 @@ -#Conv1,4 -Conv -Add -Relu -Pool -#Conv2,4 -Conv -Add -Relu -Pool -#Conv3,3 -Conv -Add -Relu -#Conv4,3 -Conv -Add -Relu -#Conv5,4 -Conv -Add -Relu -Pool -#FC1,2 -Mul -Add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/dev_knobs.txt deleted file mode 100644 index bba4dc88d5b3940413a099c13c903a01d0000c56..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/dev_knobs.txt +++ /dev/null @@ -1,6 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/knobs.txt deleted file mode 100644 index 050fc6118045090b4a5cc442105181f56d693a77..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/knobs.txt +++ /dev/null @@ -1,6 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/knobs1.txt deleted file mode 100644 index d2fc2c9493453f55cb83094373b19a24b59135d4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/knobs1.txt +++ /dev/null @@ -1,6 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/op_cost.txt deleted file mode 100644 index 04336fca2708d5e5d78849e1c12014f5ddbd1ad7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet/op_cost.txt +++ /dev/null @@ -1,6 +0,0 @@ -11894784.000000 -39321600.000000 -21233664.000000 -28311552.000000 -18874368.000000 -20480.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/alexnet2_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/alexnet2_layers.txt deleted file mode 100644 index 00059a38ce4d7a71d3c5f0b4888924e4fcce9e98..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/alexnet2_layers.txt +++ /dev/null @@ -1,7 +0,0 @@ -conv add tanh -conv add tanh pool -conv add tanh -conv add tanh pool -conv add tanh -conv add tanh pool -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/alexnet2_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/alexnet2_tensors.txt deleted file mode 100644 index 747c7221bae6e15ebb2d86c3ae6e577362602700..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/alexnet2_tensors.txt +++ /dev/null @@ -1,30 +0,0 @@ -#Conv1,3 -Conv -Add -Relu -#Conv2,4 -Conv -Add -Relu -Pool -#Conv3,3 -Conv -Add -Relu -#Conv4,4 -Conv -Add -Relu -Pool -#Conv5,3 -Conv -Add -Relu -#Conv6,4 -Conv -Add -Relu -Pool -#FC1,2 -Mul -Add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/dev_knobs.txt deleted file mode 100644 index 0324aecdca3c4b13fb30f1afdabcf69d22df9027..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/dev_knobs.txt +++ /dev/null @@ -1,7 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/knobs.txt deleted file mode 100644 index c873eeddcdeaa44fe0365bdb5e3292997d0074b6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/knobs.txt +++ /dev/null @@ -1,7 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/knobs1.txt deleted file mode 100644 index 063ba473d6a7fa57d7572c86dde9beac0932163d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/knobs1.txt +++ /dev/null @@ -1,7 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/op_cost.txt deleted file mode 100644 index 5a5722f202dde469dca94c71dd9c5fc1cd7aa32b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2/op_cost.txt +++ /dev/null @@ -1,7 +0,0 @@ -88473.601562 -943718.375000 -471859.187500 -943718.375000 -471859.187500 -943718.375000 -2048.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/layers.txt deleted file mode 100644 index 01f40077d4f8342479d1965551af2d7e30a4c3f2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/layers.txt +++ /dev/null @@ -1,13 +0,0 @@ -conv add tanh -conv add tanh pool -conv add tanh -conv add tanh pool -conv add tanh -conv add tanh pool -dense add -reduce -conv -conv -conv -reduce -reduce diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/op_cost.txt deleted file mode 100644 index 80ff2706a43e33b81af6d47e96f702efdfcb21b3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/op_cost.txt +++ /dev/null @@ -1,13 +0,0 @@ -468.076 -947.434 -255.422 -348.769 -256.658 -1.05427 -1.05427 -107.5062 -666.888 -432.622 -252.458 -11.51922 -2.01168 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/layer_composition.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/layer_composition.txt deleted file mode 100644 index b2bf962cd60722978b3205adca9c5822e59fc603..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/layer_composition.txt +++ /dev/null @@ -1,8 +0,0 @@ -conv add activation pool -conv add activation pool -conv add activation -conv add activation -conv add activation pool -dense add activation -dense add activation -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/op_cost.txt deleted file mode 100644 index ec3b8b5f375673e659594dca7ad8fd8ef6ace435..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/op_cost.txt +++ /dev/null @@ -1,8 +0,0 @@ -1457111.000000 -4478976.000000 -2242805.750000 -2990407.750000 -1993605.125000 -754974.750000 -335544.312500 -81920.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/quant_ranges2.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/quant_ranges2.txt deleted file mode 100644 index 36c9c390b54168b9c872939f81dc2c6187e04761..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/quant_ranges2.txt +++ /dev/null @@ -1,8 +0,0 @@ -0.0 255.0 -0.5503702693581581 0.5811487324237921 -2.802485 1.648145 0.0 1572.3096923828125 -0.0 1572.3096923828125 -0.2867645202279091 0.26272463005783797 -0.47985682 0.501206 0.0 3183.7813264160477 -0.0 3183.7813264160477 -0.16606662392616273 0.15785247704386754 -0.42038992 0.5545839 0.0 1765.4451872558668 -0.0 1765.4451872558668 -0.10464580833911895 0.11035470351576919 -1.4275751 0.9042998 0.0 1345.5418548586083 -0.0 1345.5418548586083 -0.09240880391001702 0.10250756608694818 -0.45662758 2.4040315 0.0 1227.3563232421875 -0.0 1227.3563232421875 -0.030517672039568428 0.02963459612801672 -0.07124679 0.09377053 0.0 1034.5966391601676 -0.0 1034.5966391601676 -0.038392101023346184 0.039147199764847845 -0.050027702 0.1841282 0.0 839.0697069702154 -0.0 839.0697069702154 -0.05494491942599416 0.08549865524470925 -0.16314922 0.15416704 -608.3993963623047 1082.8444653320819 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/knobs.txt deleted file mode 100644 index 3bcc8d25cc464e2557fdedbb2b5f93b05999f5f9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/knobs.txt +++ /dev/null @@ -1,17 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,41,43,45 -11,41,43,45 -11 -11,41,43,45 -11,41,43,45 -11 -11 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,41,43,45 -11,41,43,45 -11 -11,41,43,45 -11,41,43,45 -11 -11,12 -11 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/op_cost.txt deleted file mode 100644 index 9575b4b7c6783c0bd49eb8aa945045a6a4614af1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/op_cost.txt +++ /dev/null @@ -1,17 +0,0 @@ -4425.6 -112.809 -10.9522 -675.831 -113.067 -11.4471 -674.881 -686.27 -4334.16 -112.544 -11.1993 -674.836 -112.973 -11.7161 -675.575 -685.598 -686.733 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/tuner_conf_template.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/tuner_conf_template.txt deleted file mode 100644 index 4cd90c098b42e33b2af46593ac760775c5c92e85..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/blend/tuner_conf_template.txt +++ /dev/null @@ -1,20 +0,0 @@ -+++++ -conf1 1.5 90 1.0 2.0 -1 gpu conv fp32 1 -2 gpu reduce fp32 1 -3 gpu reduce fp32 1 -4 gpu map2 fp32 1 -5 gpu reduce fp32 1 -6 gpu reduce fp32 1 -7 gpu map2 fp32 1 -8 gpu map2 fp32 1 -9 gpu conv fp32 1 -10 gpu reduce fp32 1 -11 gpu reduce fp32 1 -12 gpu map2 fp32 1 -13 gpu reduce fp32 1 -14 gpu reduce fp32 1 -15 gpu map2 fp32 1 -16 gpu map2 fp32 1 -17 gpu map2 fp32 1 ------ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/knobs.txt deleted file mode 100644 index bd4820f4123eafa0fbcda1c4896e91e2f7dfda7f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/knobs.txt +++ /dev/null @@ -1,9 +0,0 @@ -11,41,43,45 -11 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 -11,41,43,45 -11,41,43,45 -11 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/op_cost.txt deleted file mode 100644 index 64e2c8bec1bed96c8d6a25d3358075adb0de48f0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/op_cost.txt +++ /dev/null @@ -1,9 +0,0 @@ -87.1338 -10.9506 -703.070 -520.145 -335.735 -19.1877 -9.27351 -2.46408 -17.7423 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/tuner_conf_template.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/tuner_conf_template.txt deleted file mode 100644 index 17b30a46e0e4ae079f909e38d79c86876e802e25..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/canny/tuner_conf_template.txt +++ /dev/null @@ -1,12 +0,0 @@ -+++++ -conf1 1.5 90 1.0 2.0 -1 gpu reduce fp16 1 -2 gpu map1 fp16 1 -3 gpu conv fp16 1 -4 gpu conv fp16 1 -5 gpu conv fp16 1 -6 gpu map2 fp16 1 -7 gpu reduce fp16 1 -8 gpu reduce fp16 1 -9 gpu map2 fp16 1 ------ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/knobs.txt deleted file mode 100644 index 15a06d67cbfc94f0add1221c4e07cc3c4a0459c5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/knobs.txt +++ /dev/null @@ -1,4 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,42,44,46 -12,42,44,46 -12 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/op_cost.txt deleted file mode 100644 index 910bcf3579b7d66759a0198db07e4339ccc1aa08..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/op_cost.txt +++ /dev/null @@ -1,4 +0,0 @@ -1672.868 -198.9254 -9876.189 -28158.14 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/tuner_conf_template.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/tuner_conf_template.txt deleted file mode 100644 index 49cee56932256482dd15746fa369406eb7cba23b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/fft/tuner_conf_template.txt +++ /dev/null @@ -1,7 +0,0 @@ -+++++ -conf1 1.5 90 1.0 2.0 -1 gpu conv fp32 1 -2 gpu reduce fp32 1 -3 gpu reduce fp32 1 -4 gpu map2 fp32 1 ------ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt deleted file mode 100644 index ee2cd80cb6e33da5e97ffe2e842644d7a705cdff..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt +++ /dev/null @@ -1,69 +0,0 @@ -fp32,11 -1 1.0 tensorConvolution tensorConvApprox dev conv_fc_red -fp16,12 -1 1.5 tensorConvolution tensorConvApproxHalf2 install conv_fc_red -perf,121 1,2,0 2.0 tensorConvolution tensorConvApprox dev conv -perf,122 1,2,1 2.0 tensorConvolution tensorConvApprox dev conv -perf,123 2,1,0 2.0 tensorConvolution tensorConvApprox dev conv -perf,124 2,1,1 2.0 tensorConvolution tensorConvApprox dev conv -perf,125 1,3,0 1.5 tensorConvolution tensorConvApprox dev conv -perf,126 1,3,1 1.5 tensorConvolution tensorConvApprox dev conv -perf,127 1,3,2 1.5 tensorConvolution tensorConvApprox dev conv -perf,128 3,1,0 1.5 tensorConvolution tensorConvApprox dev conv -perf,129 3,1,1 1.5 tensorConvolution tensorConvApprox dev conv -perf,130 3,1,2 1.5 tensorConvolution tensorConvApprox dev conv -perf,131 1,4,0 1.33 tensorConvolution tensorConvApprox dev conv -perf,132 1,4,1 1.33 tensorConvolution tensorConvApprox dev conv -perf,133 1,4,2 1.33 tensorConvolution tensorConvApprox dev conv -perf,134 1,4,3 1.33 tensorConvolution tensorConvApprox dev conv -perf,135 4,1,0 1.33 tensorConvolution tensorConvApprox dev conv -perf,136 4,1,1 1.33 tensorConvolution tensorConvApprox dev conv -perf,137 4,1,2 1.33 tensorConvolution tensorConvApprox dev conv -perf,138 4,1,3 1.33 tensorConvolution tensorConvApprox dev conv -perf_fp16,151 1,2,0 3.0 tensorConvolution tensorConvApprox install conv -perf_fp16,152 1,2,1 3.0 tensorConvolution tensorConvApprox install conv -perf_fp16,153 2,1,0 3.0 tensorConvolution tensorConvApprox install conv -perf_fp16,154 2,1,1 3.0 tensorConvolution tensorConvApprox install conv -perf_fp16,155 1,3,0 2.25 tensorConvolution tensorConvApprox install conv -perf_fp16,156 1,3,1 2.25 tensorConvolution tensorConvApprox install conv -perf_fp16,157 1,3,2 2.25 tensorConvolution tensorConvApprox install conv -perf_fp16,158 3,1,0 2.25 tensorConvolution tensorConvApprox install conv -perf_fp16,159 3,1,1 2.25 tensorConvolution tensorConvApprox install conv -perf_fp16,160 3,1,2 2.25 tensorConvolution tensorConvApprox install conv -perf_fp16,161 1,4,0 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,162 1,4,1 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,163 1,4,2 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,164 1,4,3 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,165 4,1,0 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,166 4,1,1 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,167 4,1,2 2.0 tensorConvolution tensorConvApprox install conv -perf_fp16,168 4,1,3 2.0 tensorConvolution tensorConvApprox install conv -samp,231 2,0,1 2.0 tensorConvolution tensorConvApprox dev conv -samp,232 2,1,1 2.0 tensorConvolution tensorConvApprox dev conv -samp,233 3,0,1 1.5 tensorConvolution tensorConvApprox dev conv -samp,234 3,1,1 1.5 tensorConvolution tensorConvApprox dev conv -samp,235 3,2,1 1.5 tensorConvolution tensorConvApprox dev conv -samp,236 4,0,1 1.33 tensorConvolution tensorConvApprox dev conv -samp,237 4,1,1 1.33 tensorConvolution tensorConvApprox dev conv -samp,238 4,2,1 1.33 tensorConvolution tensorConvApprox dev conv -samp,239 4,3,1 1.33 tensorConvolution tensorConvApprox dev conv -samp_fp16,261 2,0,1 3.0 tensorConvolution tensorConvApprox install conv -samp_fp16,262 2,1,1 3.0 tensorConvolution tensorConvApprox install conv -samp_fp16,263 3,0,1 2.25 tensorConvolution tensorConvApprox install conv -samp_fp16,264 3,1,1 2.25 tensorConvolution tensorConvApprox install conv -samp_fp16,265 3,2,1 2.25 tensorConvolution tensorConvApprox install conv -samp_fp16,266 4,0,1 2.0 tensorConvolution tensorConvApprox install conv -samp_fp16,267 4,1,1 2.0 tensorConvolution tensorConvApprox install conv -samp_fp16,268 4,2,1 2.0 tensorConvolution tensorConvApprox install conv -samp_fp16,269 4,3,1 2.0 tensorConvolution tensorConvApprox install conv -red_samp,41 1 1.5 tensorReduction tensorReduction dev red -red_samp,42 1 2.25 tensorReduction tensorReduction dev red -red_samp,43 1 1.4 tensorReduction tensorReduction dev red -red_samp,44 1 2 tensorReduction tensorReduction dev red -red_samp,45 1 1.25 tensorReduction tensorReduction dev red -red_samp,46 1 1.8 tensorReduction tensorReduction dev red -swing_level,1 1 12 tensorConvolution tensorConvApprox install conv_fc -swing_level,2 1 10 tensorConvolution tensorConvApprox install conv_fc -swing_level,3 1 9 tensorConvolution tensorConvApprox install conv_fc -swing_level,4 1 8 tensorConvolution tensorConvApprox install conv_fc -swing_level,5 1 6 tensorConvolution tensorConvApprox install conv_fc -swing_level,6 1 5 tensorConvolution tensorConvApprox install conv_fc -swing_level,7 1 4 tensorConvolution tensorConvApprox install conv_fc diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs_dnn.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs_dnn.txt deleted file mode 100644 index 2180997527410cfdbf577a116fd39a592e2af05b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs_dnn.txt +++ /dev/null @@ -1,25 +0,0 @@ -fp32,11 -1 1.0 tensorConvolution tensorConvolution -fp16,12 -1 1.5 tensorConvolution tensorHalfConvolution -perf,21 1,2,0 2.25 tensorConvolution tensorConvPerfCuda -perf,22 1,2,1 2.25 tensorConvolution tensorConvPerfCuda -perf,23 1,3,0 1.88 tensorConvolution tensorConvPerfCuda -perf,24 1,3,1 1.88 tensorConvolution tensorConvPerfCuda -perf,25 1,3,2 1.88 tensorConvolution tensorConvPerfCuda -perf,26 2,1,0 2.25 tensorConvolution tensorConvPerfCuda -perf,27 2,1,1 2.25 tensorConvolution tensorConvPerfCuda -perf,28 3,1,0 1.88 tensorConvolution tensorConvPerfCuda -perf,29 3,1,1 1.88 tensorConvolution tensorConvPerfCuda -perf,30 3,1,2 1.88 tensorConvolution tensorConvPerfCuda -samp,31 2,0 2.25 tensorConvolution tensorConvSampSim -samp,32 2,1 2.25 tensorConvolution tensorConvSampSim -samp,33 4,0 1.8 tensorConvolution tensorConvSampSim -samp,34 4,1 1.8 tensorConvolution tensorConvSampSim -samp,35 4,2 1.8 tensorConvolution tensorConvSampSim -samp,36 4,3 1.8 tensorConvolution tensorConvSampSim -swing_level,1 1 12 -swing_level,2 1 10 -swing_level,3 1 9 -swing_level,4 1 8 -swing_level,5 1 6 -swing_level,6 1 5 -swing_level,7 1 4 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs_old.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs_old.txt deleted file mode 100644 index c632abbd478101158063879706d6baf93852c8ef..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs_old.txt +++ /dev/null @@ -1,31 +0,0 @@ -fp32,11 -1 1.0 tensorConvolution tensorConvolution -fp16,12 -1 1.5 tensorConvolution tensorHalfConvolution -perf,21 1,2,0 2.0 tensorConvolution tensorConvPerfCuda -perf,22 1,2,1 2.0 tensorConvolution tensorConvPerfCuda -perf,23 1,3,0 1.5 tensorConvolution tensorConvPerfCuda -perf,24 1,3,1 1.5 tensorConvolution tensorConvPerfCuda -perf,25 1,3,2 1.5 tensorConvolution tensorConvPerfCuda -perf,26 2,1,0 2.0 tensorConvolution tensorConvPerfCuda -perf,27 2,1,1 2.0 tensorConvolution tensorConvPerfCuda -perf,28 3,1,0 1.5 tensorConvolution tensorConvPerfCuda -perf,29 3,1,1 1.5 tensorConvolution tensorConvPerfCuda -perf,30 3,1,2 1.5 tensorConvolution tensorConvPerfCuda -samp,31 2,0 2.0 tensorConvolution tensorConvSampSim -samp,32 2,1 2.0 tensorConvolution tensorConvSampSim -samp,33 4,0 1.5 tensorConvolution tensorConvSampSim -samp,34 4,1 1.5 tensorConvolution tensorConvSampSim -samp,35 4,2 1.5 tensorConvolution tensorConvSampSim -samp,36 4,3 1.5 tensorConvolution tensorConvSampSim -reduction_samp,41 1 1.5 -reduction_samp,42 1 2.25 -reduction_samp,43 1 1.4 -reduction_samp,44 1 2 -reduction_samp,45 1 1.25 -reduction_samp,46 1 1.8 -swing_level,1 1 12 -swing_level,2 1 10 -swing_level,3 1 9 -swing_level,4 1 8 -swing_level,5 1 6 -swing_level,6 1 5 -swing_level,7 1 4 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/dev_knobs.txt deleted file mode 100644 index 94b9e6ebd34d115c62f075f2f12553284fdd981d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/dev_knobs.txt +++ /dev/null @@ -1,4 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/knobs.txt deleted file mode 100644 index 8973c89f7a89f9c62c12f8371d16eebad7264b31..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/knobs.txt +++ /dev/null @@ -1,4 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/knobs1.txt deleted file mode 100644 index be1ce58c95981535ec94a7f8badffe967cfed586..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/knobs1.txt +++ /dev/null @@ -1,4 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/lenet_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/lenet_layers.txt deleted file mode 100644 index 5c28aa6dca176e1b0ef00fcad2fecf8024c76563..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/lenet_layers.txt +++ /dev/null @@ -1,4 +0,0 @@ -conv add pool tanh -conv add pool tanh -dense add tanh -dense add tanh diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/lenet_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/lenet_tensors.txt deleted file mode 100644 index f26403376fd23964ab11743d4c667860126f1581..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/lenet_tensors.txt +++ /dev/null @@ -1,18 +0,0 @@ -#Conv1,4 -Conv -Add -Relu -Pool -#Conv2,4 -Conv -Add -Relu -Pool -#FC1,3 -Mul -Add -Relu -#FC1,3 -Mul -Add -Relu diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/op_cost.txt deleted file mode 100644 index 74b1b668e2f27f3ddb77dcac7fff9890c70a6f02..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/lenet/op_cost.txt +++ /dev/null @@ -1,4 +0,0 @@ -62720.000000 -1003520.000000 -321126.406250 -1024.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/dev_knobs.txt deleted file mode 100644 index 7e8de16a0800979f707099559dc14cfd003140b3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/dev_knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/knobs.txt deleted file mode 100644 index 900ad3944d5203d4552a75140358388c99bea181..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/knobs1.txt deleted file mode 100644 index 6719acb97a58bd7f3d9fbe428f755e13df98b3d0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/knobs1.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/mobilenet_layer_comp.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/mobilenet_layer_comp.txt deleted file mode 100644 index adcfbfed538bedeb1d947039943dcb2dfca5e548..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/mobilenet_layer_comp.txt +++ /dev/null @@ -1,83 +0,0 @@ -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -pool_mean -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/mobilenet_ops.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/mobilenet_ops.txt deleted file mode 100644 index 6481664b869927a6b40f14d46e2e56c07068456a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/mobilenet_ops.txt +++ /dev/null @@ -1,167 +0,0 @@ -#Conv1,1 -Conv1 -#NML1,1 -BatchNorm1 -#NML2,1 -Relu1 -#NML3,1 -Conv2 -#NML4,1 -BatchNorm2 -#NML5,1 -Relu2 -#Conv3,1 -Conv3 -#NML6,1 -BatchNorm3 -#NML7,1 -Relu3 -#NML8,1 -Conv4 -#NML9,1 -BatchNorm4 -#NML10,1 -Relu4 -#Conv5,1 -Conv5 -#NML11,1 -BatchNorm5 -#NML12,1 -Relu5 -#NML13,1 -Conv6 -#NML14,1 -BatchNorm6 -#NML15,1 -Relu6 -#Conv7,1 -Conv7 -#NML16,1 -BatchNorm7 -#NML17,1 -Relu7 -#NML18,1 -Conv8 -#NML19,1 -BatchNorm8 -#NML20,1 -Relu8 -#Conv9,1 -Conv9 -#NML21,1 -BatchNorm9 -#NML22,1 -Relu9 -#NML23,1 -Conv10 -#NML24,1 -BatchNorm10 -#NML25,1 -Relu10 -#Conv11,1 -Conv11 -#NML26,1 -BatchNorm11 -#NML27,1 -Relu11 -#NML28,1 -Conv12 -#NML29,1 -BatchNorm12 -#NML30,1 -Relu12 -#Conv13,1 -Conv13 -#NML31,1 -BatchNorm13 -#NML32,1 -Relu13 -#NML33,1 -Conv14 -#NML34,1 -BatchNorm14 -#NML35,1 -Relu14 -#Conv15,1 -Conv15 -#NML36,1 -BatchNorm15 -#NML37,1 -Relu15 -#NML38,1 -Conv16 -#NML39,1 -BatchNorm16 -#NML40,1 -Relu16 -#Conv17,1 -Conv17 -#NML41,1 -BatchNorm17 -#NML42,1 -Relu17 -#NML43,1 -Conv18 -#NML44,1 -BatchNorm18 -#NML45,1 -Relu18 -#Conv19,1 -Conv19 -#NML46,1 -BatchNorm19 -#NML47,1 -Relu19 -#NML48,1 -Conv20 -#NML49,1 -BatchNorm20 -#NML50,1 -Relu20 -#Conv21,1 -Conv21 -#NML51,1 -BatchNorm21 -#NML52,1 -Relu21 -#NML53,1 -Conv22 -#NML54,1 -BatchNorm22 -#NML55,1 -Relu22 -#Conv23,1 -Conv23 -#NML56,1 -BatchNorm23 -#NML57,1 -Relu23 -#NML58,1 -Conv24 -#NML59,1 -BatchNorm24 -#NML60,1 -Relu24 -#Conv25,1 -Conv25 -#NML61,1 -BatchNorm25 -#NML62,1 -Relu25 -#NML63,1 -Conv26 -#NML64,1 -BatchNorm26 -#NML65,1 -Relu26 -#Conv27,1 -Conv27 -#NML66,1 -BatchNorm27 -#NML67,1 -Relu27 -#NML68,1 -Pool1 -#FC1,2 -Mul1 -Add1 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/op_cost.txt deleted file mode 100644 index 673e704b7e37e19c090e98799189a4411bad9f7c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet/op_cost.txt +++ /dev/null @@ -1,28 +0,0 @@ -88473.601562 -29491.199219 -209715.203125 -14745.599609 -209715.203125 -29491.199219 -419430.406250 -7372.799805 -209715.203125 -14745.599609 -419430.406250 -3686.399902 -209715.203125 -7372.799805 -419430.406250 -7372.799805 -419430.406250 -7372.799805 -419430.406250 -7372.799805 -419430.406250 -7372.799805 -419430.406250 -1843.199951 -209715.203125 -3686.399902 -419430.406250 -1024.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/dev_knobs.txt deleted file mode 100644 index 9b93811d3b6aaca218ce83af9e03bcacf9fe62a0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/dev_knobs.txt +++ /dev/null @@ -1,8 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/knobs.txt deleted file mode 100644 index c7273f3fc6e487ada58eaed7bc036f707c3ce541..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/knobs.txt +++ /dev/null @@ -1,8 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/knobs1.txt deleted file mode 100644 index 719d96e48168a477d6edfee1a02b80b554612ec7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/knobs1.txt +++ /dev/null @@ -1,8 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/mobilenet_shallow_layer_comp.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/mobilenet_shallow_layer_comp.txt deleted file mode 100644 index 8ba22c7e6f0268cf4f802576e04bdbe55b9efd15..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/mobilenet_shallow_layer_comp.txt +++ /dev/null @@ -1,41 +0,0 @@ -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -pool_mean -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/mobilenet_shallow_ops.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/mobilenet_shallow_ops.txt deleted file mode 100644 index ee6dcf89d959234cc5fdd3267d705dcd76db8250..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/mobilenet_shallow_ops.txt +++ /dev/null @@ -1,47 +0,0 @@ -#Conv1,1 -Conv1 -#NML1,1 -BatchNorm1 -#NML2,1 -Relu1 -#NML3,1 -Conv2 -#NML4,1 -BatchNorm2 -#NML5,1 -Relu2 -#Conv3,1 -Conv3 -#NML6,1 -BatchNorm3 -#NML7,1 -Relu3 -#NML8,1 -Conv4 -#NML9,1 -BatchNorm4 -#NML10,1 -Relu4 -#Conv5,1 -Conv5 -#NML11,1 -BatchNorm5 -#NML12,1 -Relu5 -#NML13,1 -Conv6 -#NML14,1 -BatchNorm6 -#NML15,1 -Relu6 -#Conv7,1 -Conv7 -#NML16,1 -BatchNorm7 -#NML17,1 -Relu7 -#NML18,1 -Pool1 -#FC1,2 -Mul1 -Add1 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/op_cost.txt deleted file mode 100644 index 7266441905a08c1ef1796dec8ee6c05660998378..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_shallow/op_cost.txt +++ /dev/null @@ -1,8 +0,0 @@ -265420.812500 -629145.625000 -629145.625000 -1258291.250000 -629145.625000 -1258291.250000 -629145.625000 -6144.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/layers.txt deleted file mode 100644 index a93fac1daed00254fca84258bc92e7788390fd93..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/layers.txt +++ /dev/null @@ -1,81 +0,0 @@ -conv -batchnorm -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/op_cost.txt deleted file mode 100644 index 44d50dbe00baba66bd76bb7a0d2a9f37b8580fd4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/op_cost.txt +++ /dev/null @@ -1,15 +0,0 @@ -44236.80078 -104857.6019 -104857.6019 -209715.2037 -104857.6019 -209715.2037 -104857.6019 -209715.2037 -209715.2037 -209715.2037 -209715.2037 -209715.2037 -104857.6019 -209715.2037 -256.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/dev_knobs.txt deleted file mode 100644 index 2a07f89372edf02499c9e4462290d2495da5bfee..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/dev_knobs.txt +++ /dev/null @@ -1,22 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs.txt deleted file mode 100644 index eadcb5ebff73feb75b9f7533f7703252ab895afc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs.txt +++ /dev/null @@ -1,22 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs1.txt deleted file mode 100644 index b7ff033cec2b85390ce6c7667fbbb04837a7eaf9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs1.txt +++ /dev/null @@ -1,22 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs2.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs2.txt deleted file mode 100644 index ec3e26a51f1bbfb29436aa532b493c22557e31d7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/knobs2.txt +++ /dev/null @@ -1,22 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/op_cost.txt deleted file mode 100644 index fdba070cfc5eac559c8384306993fb52a1eb2e04..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/op_cost.txt +++ /dev/null @@ -1,22 +0,0 @@ -44236.800781 -235929.593750 -235929.593750 -235929.593750 -235929.593750 -235929.593750 -235929.593750 -117964.796875 -235929.593750 -13107.200195 -235929.593750 -235929.593750 -235929.593750 -235929.593750 -117964.796875 -235929.593750 -13107.200195 -235929.593750 -235929.593750 -235929.593750 -235929.593750 -64.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/resnet_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/resnet_layers.txt deleted file mode 100644 index 43f00249253e6f4375153ff2309c470f4923a1d0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/resnet_layers.txt +++ /dev/null @@ -1,41 +0,0 @@ -conv add activation -conv add activation -conv add -add -activation -conv add activation -conv add -add -activation -conv add activation -conv add -add -activation -conv add activation -conv add -conv add -add -activation -conv add activation -conv add -add -activation -conv add activation -conv add -add -activation -conv add activation -conv add -conv add -add -activation -conv add activation -conv add -add -activation -conv add activation -conv add -add -activation -pool_mean -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/resnet_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/resnet_tensors.txt deleted file mode 100644 index ee0e8456daf66eb07bf3200d3f4ab076534f6634..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet/resnet_tensors.txt +++ /dev/null @@ -1,114 +0,0 @@ -#Conv1,3 -Conv -Add -Relu -#Conv2,3 -Conv -Add -Relu -#Conv3,2 -Conv -Add -#NML1,1 -Add -#NML2,1 -Relu -#Conv4,3 -Conv -Add -Relu -#Conv5,2 -Conv -Add -#NML3,1 -Add -#NML4,1 -Relu -#Conv6,3 -Conv -Add -Relu -#Conv7,2 -Conv -Add -#NML5,1 -Add -#NML6,1 -Relu -#Conv8,3 -Conv -Add -Relu -#Conv9,2 -Conv -Add -#Conv10,2 -Conv -Add -#NML7,1 -Add -#NML8,1 -Relu -#Conv11,3 -Conv -Add -Relu -#Conv12,2 -Conv -Add -#NML9,1 -Add -#NML10,1 -Relu -#Conv13,3 -Conv -Add -Relu -#Conv14,2 -Conv -Add -#NML11,1 -Add -#NML12,1 -Relu -#Conv15,3 -Conv -Add -Relu -#Conv16,2 -Conv -Add -#Conv17,2 -Conv -Add -#NML13,1 -Add -#NML14,1 -Relu -#Conv18,3 -Conv -Add -Relu -#Conv19,2 -Conv -Add -#NML15,1 -Add -#NML16,1 -Relu -#Conv20,3 -Conv -Add -Relu -#Conv21,2 -Conv -Add -#NML17,1 -Add -#NML18,1 -Relu -#NML19,1 -Pool -#FC1,2 -Mul -Add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/op_cost.txt deleted file mode 100644 index 6fb1aef66aaa4a02c5eb6f9282753a43c629f203..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/op_cost.txt +++ /dev/null @@ -1,21 +0,0 @@ -88473.60156 -1887436.833 -1887436.833 -1887436.833 -1887436.833 -3774873.667 -1887436.833 -26214.40046 -1887436.833 -1887436.833 -3774873.667 -1887436.833 -13107.20023 -1887436.833 -1887436.833 -3774873.667 -1887436.833 -6553.600116 -1887436.833 -1887436.833 -64.0000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/resnet_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/resnet_layers.txt deleted file mode 100644 index 2e51c67842656762091f2465b2824235a9959723..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/resnet_layers.txt +++ /dev/null @@ -1,59 +0,0 @@ -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -activation -conv -batchnorm -conv -batchnorm -conv -batchnorm -activation -conv -batchnorm -activation -activation -pool_mean -dense add \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/dev_knobs.txt deleted file mode 100644 index 44fabd399e9d114f8bbbf4b64822e85a334fc162..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/dev_knobs.txt +++ /dev/null @@ -1,54 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/op_cost.txt deleted file mode 100644 index 51a116031c59a0e62d90861e31dda222a901156b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/op_cost.txt +++ /dev/null @@ -1,54 +0,0 @@ -1180139.500000 -123904.000000 -1115136.000000 -495616.000000 -495616.000000 -495616.000000 -1115136.000000 -495616.000000 -495616.000000 -1115136.000000 -495616.000000 -238878.718750 -1156055.000000 -513802.250000 -955514.875000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -256901.125000 -1156055.000000 -513802.250000 -1027604.500000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -256901.125000 -1156055.000000 -513802.250000 -1027604.500000 -513802.250000 -1156055.000000 -513802.250000 -513802.250000 -1156055.000000 -513802.250000 -20480.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/quant_ranges2.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/quant_ranges2.txt deleted file mode 100644 index efd6050d42c41cc53bca6b8a1e37ecd476dc2c10..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/quant_ranges2.txt +++ /dev/null @@ -1,54 +0,0 @@ --123.68 151.061 -0.5747970953583718 0.56459772187472 -0.0058212606 0.015389577 0.0 934.6561442260769 --5.810299396514893 12.891858446121272 -0.544614662528038 0.24515408918262138 -0.00013975719 0.00010588932 -10.319062399864197 8.678670206069992 -0.0 6.145776463985475 -0.18031905634701254 0.21207299317419936 -0.00017183031 0.000101588186 -7.6816201620101925 9.201453342437766 -0.0 10.43091783905031 -0.22560040648281576 0.22525191849470233 -0.00024638147 0.00014042071 -3.83010697555542 3.860411907196095 --5.810299396514893 12.891858446121272 -0.43263624683022495 0.2949749290347107 -0.000119998615 9.139888e-05 -8.061470986366272 8.141587300300685 -0.0 6.200384625434893 -0.1698226538300514 0.14235900431871545 -8.860863e-05 8.355517e-05 -5.481491597175599 4.2078081378936965 -0.0 4.47170594310761 -0.15525014728307723 0.15632940307259882 -8.171968e-05 4.0018913e-05 -3.6843703370094296 3.4650170893669143 -0.0 4.6161603989601145 -0.15933965119719506 0.17397216582299158 -0.00012988421 0.00010135813 -1.5977122792005538 1.4737758718729186 -0.0 6.8487525005340615 -0.14451123128831386 0.14304637806118328 -4.2361393e-05 6.6440014e-05 -4.931722650527954 4.449415233612065 -0.0 4.033760806083722 -0.12324695030599832 0.12555985279381346 -5.0523813e-05 4.329417e-05 -3.2670734379291533 3.6830093951225296 -0.0 6.667089067935957 -0.1683492512255907 0.16272910618782485 -6.668582e-05 3.8562874e-05 -2.2874248178005216 1.6114515174627524 -0.0 7.2551298189163305 -0.14964802296459673 0.1396080917269055 -1.9781537e-05 2.6307946e-05 -5.558738519668579 4.226346692085322 -0.0 4.103045602321853 -0.09806137733161449 0.11390491172671352 -4.056617e-05 2.3085524e-05 -5.205046501636505 5.010882857322848 -0.0 3.8779746284484986 -0.1485816574841738 0.1498906888067731 -4.8492308e-05 6.3164814e-05 -1.5636358127593994 1.4471412793398377 -0.0 7.2551298189163305 -0.16395657777786254 0.16875704464316676 -2.4847817e-05 2.8032688e-05 -4.694492742061615 4.153118049144783 -0.0 7.047195756912359 -0.09732106301933527 0.10529463365674105 -1.8920293e-05 1.3819959e-05 -5.122300206184387 3.822466685295119 -0.0 3.601598421573673 -0.08627650760114193 0.08969089165330116 -2.4561228e-05 3.960287e-05 -2.9372031297683714 2.453564183473647 -0.0 3.8704470412732075 -0.12190281387418508 0.1344742950797146 -6.2474406e-05 4.5489418e-05 -1.1263611575365067 0.9419634058475754 -0.0 7.360276422501315 -0.11249553721398114 0.1438516600430019 -1.775357e-05 1.5396989e-05 -6.671636656284333 7.135764349460775 -0.0 3.7979030499458766 -0.10526792272925377 0.1349908015877019 -3.4459194e-05 3.4367415e-05 -4.684974233627319 5.264288017273344 -0.0 4.181413197040673 -0.1404084537923336 0.12109696540981926 -6.0512917e-05 5.7992842e-05 -1.707221627831459 0.9086800929904393 -0.0 7.209602268219356 -0.16052422896027566 0.19026783116161974 -2.5455045e-05 2.27223e-05 -4.75133854341507 4.969562854290029 -0.0 5.027068079471608 -0.11333685562014578 0.10740291688591388 -3.8223963e-05 3.5434005e-05 -4.555457803249359 3.7164909157753137 -0.0 5.087393596649491 -0.11392991304397583 0.14321510225534495 -3.1888136e-05 3.15042e-05 -1.5135304692983627 1.3553025794029487 -0.0 8.094761092185998 -0.10484503862261772 0.1209987572357194 -1.4100494e-05 1.0806584e-05 -4.084207750797272 3.8462553339005128 -0.0 4.2752423663139805 -0.07607016738504171 0.07963053999841248 -1.6701442e-05 1.5241814e-05 -6.879458940029145 4.920539072513741 -0.0 3.51579514932658 -0.11533396770060061 0.10761716394126962 -3.1804513e-05 2.6629998e-05 -1.3364267847537994 1.0267766494750994 -0.0 8.094761092185998 -0.09670568963885307 0.10261806760728373 -1.7871134e-05 1.2980788e-05 -4.165599856376648 3.9349664554596693 -0.0 7.114320134163032 -0.06487256648391485 0.08072723559290183 -1.0383376e-05 7.2508515e-06 -7.300614297866821 4.876313820362272 -0.0 3.3421912226677954 -0.06480042573064566 0.07086511892825366 -1.8462242e-05 2.0371868e-05 -3.3948297231197357 2.547599082708394 -0.0 3.0945872743134055 -0.0918739195242524 0.0884740859493657 -3.932561e-05 3.442098e-05 -1.020233019709587 0.5999923423528848 -0.0 6.7569902172089265 -0.07119932436943054 0.09078719960898206 -1.09133825e-05 1.1075285e-05 -4.42014742231369 3.863946924209598 -0.0 3.1060702869893984 -0.05620019323378801 0.0647352593019614 -1.7693064e-05 2.5484389e-05 -2.1564681169986724 1.7775597308874955 -0.0 3.5050717415809824 -0.07137715590745211 0.08277502278238583 -3.7179158e-05 3.6062916e-05 -1.2057263028621674 0.7878368514776246 -0.0 6.819144511223078 -0.06814771068841219 0.09314906919747618 -1.3129389e-05 1.2350107e-05 -4.264177570819855 3.6667083778381695 -0.0 3.010977530002802 -0.05523281182348728 0.06323543420434069 -2.8231338e-05 2.693129e-05 -2.403562089204788 2.0931886761188707 -0.0 3.2924690744877205 -0.07265310526639222 0.0840946944877511 -3.2401527e-05 3.4846238e-05 -1.3499533622264863 0.5710797674655921 -0.0 6.86581426668181 -0.06556392236053944 0.10085416895151492 -1.34982765e-05 1.5712916e-05 -4.0751124567985535 3.8311849322319937 -0.0 3.305581816196461 -0.05189245833083987 0.06286057442426854 -2.7664553e-05 2.7124273e-05 -2.2815526587963104 1.6054227759838495 -0.0 4.0827409029009045 -0.0667651962414384 0.0779341282621039 -2.5976125e-05 3.067452e-05 -1.531399680972099 0.9400330729484727 -0.0 7.124539953709001 -0.07115359698235989 0.10833600817620766 -1.1960556e-05 1.597267e-05 -3.499965760946274 6.504482283592731 -0.0 5.178686481952853 -0.05611966584250331 0.06787234821170791 -2.637223e-05 2.5699503e-05 -4.342721074581146 3.5257833659650792 -0.0 7.024678532601377 -0.06805450225621462 0.08350853440165543 -2.051169e-05 2.4474735e-05 -3.132657457351684 1.5121956378223056 -0.0 7.723012191295651 -0.06772804392129184 0.0790314114466323 -1.0622113e-05 1.1603386e-05 -4.100774613380432 3.367799842357637 -0.0 3.30561476135256 -0.04215342074632644 0.04599864583462508 -2.3764493e-05 2.0471329e-05 -3.442601467609405 1.980771198987965 -0.0 2.7743033297061963 -0.05116425715386867 0.07435814850032654 -3.0402798e-05 3.17601e-05 -0.6644622380137444 0.6883783437013742 -0.0 7.723012191295651 -0.04721667753905058 0.06949943191558243 -1.7287093e-05 1.273127e-05 -2.0042565877437593 2.5162933135032794 -0.0 13.089089687347467 -0.04412535773590207 0.06208482719958153 -5.398635e-06 4.8724514e-06 -10.966248041152955 6.323037844181082 -0.0 2.665933228015901 -0.036525007653981444 0.04293334979563958 -2.6018542e-05 3.12493e-05 -2.3321639671325682 1.520020196676264 -0.0 2.3228017909526826 -0.0457546092569828 0.07228027954698127 -2.8726334e-05 4.8297712e-05 -0.5039844071269035 0.5125965433716879 -0.0 14.212855710030006 -0.05184821262955666 0.07706690635532187 -7.475739e-06 6.432327e-06 -8.724605677604675 7.675269114017488 -0.0 2.9809948239326776 -0.032554453313350676 0.03890361279249199 -2.651515e-05 3.1177675e-05 -1.8098392320871355 0.9923119935989424 -0.0 3.0153360161781455 -0.04308305019512773 0.07738747298717986 -2.5071722e-05 4.40581e-05 -0.5571711810231208 0.6459895361661969 -0.0 6.169458543777802 -0.0922410073429346 0.2492780588418327 -0.024867358 0.028531343 -8.56706305217743 11.648686022758701 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/resnet50_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/resnet50_layers.txt deleted file mode 100644 index f0b6ebedc9beccfc639b70433e30b172e2d44fea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/resnet50_layers.txt +++ /dev/null @@ -1,172 +0,0 @@ -conv add activation pool -batchnorm -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -add -activation -pool -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/resnet50_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/resnet50_tensors.txt deleted file mode 100644 index 70ec0ff11101dd554842d0a721ca4aa772a3bab2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/resnet50_imagenet/resnet50_tensors.txt +++ /dev/null @@ -1,172 +0,0 @@ -Conv1,100,3,224,224,64,3,7,7 -#tensorBatchNorm1 -Conv2,100,64,55,55,64,64,1,1 -#tensorBatchNorm2 -#tensorRelu1 -Conv3,100,64,55,55,64,64,3,3 -#tensorBatchNorm3 -#tensorRelu2 -Conv4,100,64,55,55,256,64,1,1 -#tensorBatchNorm4 -Conv5,100,64,55,55,256,64,1,1 -#tensorBatchNorm5 -#tensorAdd1 -#tensorRelu3 -Conv6,100,256,55,55,64,256,1,1 -#tensorBatchNorm6 -#tensorRelu4 -Conv7,100,64,55,55,64,64,3,3 -#tensorBatchNorm7 -#tensorRelu5 -Conv8,100,64,55,55,256,64,1,1 -#tensorBatchNorm8 -#tensorAdd2 -#tensorRelu6 -Conv9,100,256,55,55,64,256,1,1 -#tensorBatchNorm9 -#tensorRelu7 -Conv10,100,64,55,55,64,64,3,3 -#tensorBatchNorm10 -#tensorRelu8 -Conv11,100,64,55,55,256,64,1,1 -#tensorBatchNorm11 -#tensorAdd3 -#tensorRelu9 -Conv12,100,256,55,55,128,256,1,1 -#tensorBatchNorm12 -#tensorRelu10 -Conv13,100,128,28,28,128,128,3,3 -#tensorBatchNorm13 -#tensorRelu11 -Conv14,100,128,28,28,512,128,1,1 -#tensorBatchNorm14 -Conv15,100,256,55,55,512,256,1,1 -#tensorBatchNorm15 -#tensorAdd4 -#tensorRelu12 -Conv16,100,512,28,28,128,512,1,1 -#tensorBatchNorm16 -#tensorRelu13 -Conv17,100,128,28,28,128,128,3,3 -#tensorBatchNorm17 -#tensorRelu14 -Conv18,100,128,28,28,512,128,1,1 -#tensorBatchNorm18 -#tensorAdd5 -#tensorRelu15 -Conv19,100,512,28,28,128,512,1,1 -#tensorBatchNorm19 -#tensorRelu16 -Conv20,100,128,28,28,128,128,3,3 -#tensorBatchNorm20 -#tensorRelu17 -Conv21,100,128,28,28,512,128,1,1 -#tensorBatchNorm21 -#tensorAdd6 -#tensorRelu18 -Conv22,100,512,28,28,128,512,1,1 -#tensorBatchNorm22 -#tensorRelu19 -Conv23,100,128,28,28,128,128,3,3 -#tensorBatchNorm23 -#tensorRelu20 -Conv24,100,128,28,28,512,128,1,1 -#tensorBatchNorm24 -#tensorAdd7 -#tensorRelu21 -Conv25,100,512,28,28,256,512,1,1 -#tensorBatchNorm25 -#tensorRelu22 -Conv26,100,256,14,14,256,256,3,3 -#tensorBatchNorm26 -#tensorRelu23 -Conv27,100,256,14,14,1024,256,1,1 -#tensorBatchNorm27 -Conv28,100,512,28,28,1024,512,1,1 -#tensorBatchNorm28 -#tensorAdd8 -#tensorRelu24 -Conv29,100,1024,14,14,256,1024,1,1 -#tensorBatchNorm29 -#tensorRelu25 -Conv30,100,256,14,14,256,256,3,3 -#tensorBatchNorm30 -#tensorRelu26 -Conv31,100,256,14,14,1024,256,1,1 -#tensorBatchNorm31 -#tensorAdd9 -#tensorRelu27 -Conv32,100,1024,14,14,256,1024,1,1 -#tensorBatchNorm32 -#tensorRelu28 -Conv33,100,256,14,14,256,256,3,3 -#tensorBatchNorm33 -#tensorRelu29 -Conv34,100,256,14,14,1024,256,1,1 -#tensorBatchNorm34 -#tensorAdd10 -#tensorRelu30 -Conv35,100,1024,14,14,256,1024,1,1 -#tensorBatchNorm35 -#tensorRelu31 -Conv36,100,256,14,14,256,256,3,3 -#tensorBatchNorm36 -#tensorRelu32 -Conv37,100,256,14,14,1024,256,1,1 -#tensorBatchNorm37 -#tensorAdd11 -#tensorRelu33 -Conv38,100,1024,14,14,256,1024,1,1 -#tensorBatchNorm38 -#tensorRelu34 -Conv39,100,256,14,14,256,256,3,3 -#tensorBatchNorm39 -#tensorRelu35 -Conv40,100,256,14,14,1024,256,1,1 -#tensorBatchNorm40 -#tensorAdd12 -#tensorRelu36 -Conv41,100,1024,14,14,256,1024,1,1 -#tensorBatchNorm41 -#tensorRelu37 -Conv42,100,256,14,14,256,256,3,3 -#tensorBatchNorm42 -#tensorRelu38 -Conv43,100,256,14,14,1024,256,1,1 -#tensorBatchNorm43 -#tensorAdd13 -#tensorRelu39 -Conv44,100,1024,14,14,512,1024,1,1 -#tensorBatchNorm44 -#tensorRelu40 -Conv45,100,512,7,7,512,512,3,3 -#tensorBatchNorm45 -#tensorRelu41 -Conv46,100,512,7,7,2048,512,1,1 -#tensorBatchNorm46 -Conv47,100,1024,14,14,2048,1024,1,1 -#tensorBatchNorm47 -#tensorAdd14 -#tensorRelu42 -Conv48,100,2048,7,7,512,2048,1,1 -#tensorBatchNorm48 -#tensorRelu43 -Conv49,100,512,7,7,512,512,3,3 -#tensorBatchNorm49 -#tensorRelu44 -Conv50,100,512,7,7,2048,512,1,1 -#tensorBatchNorm50 -#tensorAdd15 -#tensorRelu45 -Conv51,100,2048,7,7,512,2048,1,1 -#tensorBatchNorm51 -#tensorRelu46 -Conv52,100,512,7,7,512,512,3,3 -#tensorBatchNorm52 -#tensorRelu47 -Conv53,100,512,7,7,2048,512,1,1 -#tensorBatchNorm53 -#tensorAdd16 -#tensorRelu48 -#tensorPooling1 -FC1,100,2048,2048,1000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/dev_knobs.txt deleted file mode 100644 index 8970b19bfb5f7a6fb093f1b36215c23742e6f599..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/dev_knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/knobs.txt deleted file mode 100644 index d238fa1036729f79cc66bdaa14667dcf16c60a9a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/knobs1.txt deleted file mode 100644 index fb54e7f077eaf27d7182e273fae31a867d8cbb9f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/knobs1.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/op_cost.txt deleted file mode 100644 index 5f58ebcc043915d28cf874a1f67e5b2637db1dfc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/op_cost.txt +++ /dev/null @@ -1,15 +0,0 @@ -88473.601562 -1887436.750000 -943718.375000 -1887436.750000 -943718.375000 -1887436.750000 -1887436.750000 -943718.375000 -1887436.750000 -1887436.750000 -471859.187500 -471859.187500 -471859.187500 -13107.200195 -256.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/vgg16_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/vgg16_layers.txt deleted file mode 100644 index 79818d6f010035c6e19f12881749f4d5b3d3c253..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/vgg16_layers.txt +++ /dev/null @@ -1,15 +0,0 @@ -conv add activation -conv add activation pool -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -dense add activation -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/vgg16_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/vgg16_tensors.txt deleted file mode 100644 index a524e1e74e189c175b9f0e371ba04f4a6f452a1c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10/vgg16_tensors.txt +++ /dev/null @@ -1,64 +0,0 @@ -#Conv1,3 -Conv -Add -Relu -#Conv2,4 -Conv -Add -Relu -Pool -#Conv3,3 -Conv -Add -Relu -#Conv4,4 -Conv -Add -Relu -Pool -#Conv5,3 -Conv -Add -Relu -#Conv6,3 -Conv -Add -Relu -#Conv7,4 -Conv -Add -Relu -Pool -#Conv8,3 -Conv -Add -Relu -#Conv9,3 -Conv -Add -Relu -#Conv10,4 -Conv -Add -Relu -Pool -#Conv11,3 -Conv -Add -Relu -#Conv12,3 -Conv -Add -Relu -#Conv13,4 -Conv -Add -Relu -Pool -#FC1,3 -Mul -Add -Relu -#FC2,2 -Mul -Add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/dev_knobs.txt deleted file mode 100644 index 8970b19bfb5f7a6fb093f1b36215c23742e6f599..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/dev_knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/knobs.txt deleted file mode 100644 index d238fa1036729f79cc66bdaa14667dcf16c60a9a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/knobs1.txt deleted file mode 100644 index fb54e7f077eaf27d7182e273fae31a867d8cbb9f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/knobs1.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/op_cost.txt deleted file mode 100644 index 8c6daad2e2902e3ac821d99ebbe12e21b6428cc7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/op_cost.txt +++ /dev/null @@ -1,15 +0,0 @@ -884736.000000 -18874368.000000 -9437184.000000 -18874368.000000 -9437184.000000 -18874368.000000 -18874368.000000 -9437184.000000 -18874368.000000 -18874368.000000 -4718592.000000 -4718592.000000 -4718592.000000 -131072.000000 -25600.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/vgg16_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/vgg16_layers.txt deleted file mode 100644 index 79818d6f010035c6e19f12881749f4d5b3d3c253..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/vgg16_layers.txt +++ /dev/null @@ -1,15 +0,0 @@ -conv add activation -conv add activation pool -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -dense add activation -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/vgg16_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/vgg16_tensors.txt deleted file mode 100644 index a524e1e74e189c175b9f0e371ba04f4a6f452a1c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar100/vgg16_tensors.txt +++ /dev/null @@ -1,64 +0,0 @@ -#Conv1,3 -Conv -Add -Relu -#Conv2,4 -Conv -Add -Relu -Pool -#Conv3,3 -Conv -Add -Relu -#Conv4,4 -Conv -Add -Relu -Pool -#Conv5,3 -Conv -Add -Relu -#Conv6,3 -Conv -Add -Relu -#Conv7,4 -Conv -Add -Relu -Pool -#Conv8,3 -Conv -Add -Relu -#Conv9,3 -Conv -Add -Relu -#Conv10,4 -Conv -Add -Relu -Pool -#Conv11,3 -Conv -Add -Relu -#Conv12,3 -Conv -Add -Relu -#Conv13,4 -Conv -Add -Relu -Pool -#FC1,3 -Mul -Add -Relu -#FC2,2 -Mul -Add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/layers.txt deleted file mode 100644 index ef3d0ebcf7c50b8a67a7c42cc71d4b69fe21fde2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/layers.txt +++ /dev/null @@ -1,46 +0,0 @@ -conv add -batchnorm -activation -conv add -batchnorm -activation -pool -conv add -batchnorm -activation -conv add -batchnorm -activation -pool -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -pool -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -pool -conv add -batchnorm -activation -conv add -batchnorm -activation -conv add -batchnorm -activation -pool -pool_mean -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/op_cost.txt deleted file mode 100644 index 10dc83f865f3cc4ec02e86d4ae9f689eaa143610..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/op_cost.txt +++ /dev/null @@ -1,15 +0,0 @@ -88473.60156 -1887436.833 -943718.4167 -1887436.833 -943718.4167 -1887436.833 -1887436.833 -943718.4167 -1887436.833 -1887436.833 -471859.2083 -471859.2083 -471859.2083 -13107.200195 -256.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/dev_knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/dev_knobs.txt deleted file mode 100644 index 793b41f5b8f316daf96604e25be70b21fc115046..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/dev_knobs.txt +++ /dev/null @@ -1,16 +0,0 @@ -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -11 -11 -11 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/knobs.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/knobs.txt deleted file mode 100644 index d238fa1036729f79cc66bdaa14667dcf16c60a9a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/knobs.txt +++ /dev/null @@ -1,15 +0,0 @@ -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36 -12 -12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/knobs1.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/knobs1.txt deleted file mode 100644 index fb54e7f077eaf27d7182e273fae31a867d8cbb9f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/knobs1.txt +++ /dev/null @@ -1,15 +0,0 @@ -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12,21,22,23,24,25,26,27,28,31,32,33,34 -11,12 -11,12 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/op_cost.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/op_cost.txt deleted file mode 100644 index 77754f5f8b03634faba7e933eea8d9c05f6e58ee..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/op_cost.txt +++ /dev/null @@ -1,16 +0,0 @@ -88473.601562 -1887436.750000 -943718.375000 -1887436.750000 -943718.375000 -1887436.750000 -1887436.750000 -943718.375000 -1887436.750000 -1887436.750000 -471859.187500 -471859.187500 -471859.187500 -13107.200195 -13107.200195 -256.000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/vgg16_layers.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/vgg16_layers.txt deleted file mode 100644 index bfa2a2700a164cf135541c560cdf1499f584b7a1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/vgg16_layers.txt +++ /dev/null @@ -1,16 +0,0 @@ -conv add activation -conv add activation pool -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -dense add activation -dense add activation -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/vgg16_tensors.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/vgg16_tensors.txt deleted file mode 100644 index b9afe8dd34f5b26d71f215eb5e09ea08ed4c76b6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/vgg16_tensors.txt +++ /dev/null @@ -1,68 +0,0 @@ -#Conv1,3 -Conv -Add -Relu -#Conv2,4 -Conv -Add -Relu -Pool -#Conv3,3 -Conv -Add -Relu -#Conv4,4 -Conv -Add -Relu -Pool -#Conv5,3 -Conv -Add -Relu -#Conv6,3 -Conv -Add -Relu -#Conv7,4 -Conv -Add -Relu -Pool -#Conv8,3 -Conv -Add -Relu -#Conv9,3 -Conv -Add -Relu -#Conv10,4 -Conv -Add -Relu -Pool -#Conv11,3 -Conv -Add -Relu -#Conv12,3 -Conv -Add -Relu -#Conv13,4 -Conv -Add -Relu -Pool -#FC1,3 -Mul -Add -Relu -#FC2,3 -Mul -Add -Relu -#FC3,2 -Mul -Add diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/.gitignore deleted file mode 100644 index 9eb809777b0bcfdb2a7d91f9e671282ca03610a7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/.gitignore +++ /dev/null @@ -1,51 +0,0 @@ -*.py[cod] - -# C extensions -*.so - -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg -lib -lib64 - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.coverage -.tox -nosetests.xml - -# Translations -*.mo - -# Mr Developer -.mr.developer.cfg -.project -.pydevproject - -#vim -*.swp - -#virtualenv -venv -.ropeproject -opentuner.log -.*.swo -opentuner.db -.idea - -# SMB ROM (for SMB demo) -smb.nes - -MANIFEST diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/AUTHORS.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/AUTHORS.txt deleted file mode 100644 index 620e549e236ad322446694d11dc68a2f39a3ee31..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/AUTHORS.txt +++ /dev/null @@ -1,8 +0,0 @@ -Jason Ansel -Sam Fingeret -Shoaib Kamil -Deepak Narayanan -Jonathan Ragan-Kelley -Kalyan Veeramachaneni -Kevin Wu -Minshu Zhan diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/CHANGES.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/CHANGES.txt deleted file mode 100644 index a0af44222226f64cceb85bb633072a25abb40777..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/CHANGES.txt +++ /dev/null @@ -1,3 +0,0 @@ -v0.5.0, 2015-02-10 -- Refactoring and bugfixes. -v0.4.0, 2014-10-26 -- Add api and bugfixes. -v0.3.0, 2014-08-11 -- Initial release. diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/LICENSE.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/LICENSE.txt deleted file mode 100644 index 2b602e192b4a2302cae3288e3bd34746ff8475df..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/LICENSE.txt +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014 Jason Ansel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/MANIFEST.in b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/MANIFEST.in deleted file mode 100644 index 376b77ae8f44a06787d5910191c713c986791a72..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include *.txt *.md diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/README.md b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/README.md deleted file mode 100644 index 729f35553a0fe22177a38f0545d03a4497bef03c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/README.md +++ /dev/null @@ -1,116 +0,0 @@ -OpenTuner -========= - -Program autotuning has been demonstrated in many domains to achieve better -or more portable performance. However, autotuners themselves are often not -very portable between projects because using a domain informed search space -representation is critical to achieving good results and because no single -search technique performs best for all problems. - -OpenTuner is a new framework for building domain-specific multi-objective -program autotuners. OpenTuner supports fully customizable configuration -representations, an extensible technique representation to allow for -domain-specific techniques, and an easy to use interface for communicating -with the tuned program. A key capability inside OpenTuner is the use of -ensembles of disparate search techniques simultaneously, techniques which -perform well will receive larger testing budgets and techniques which perform -poorly will be disabled. - -System dependencies -------------------- - -A list of system dependencies can be found in [debian-packages-deps][] -which are primarily python 2.6+ (not 3.x) and sqlite3 (or your -[supported][sqlalchemy-dialects] database backend of choice). - -On Ubuntu/Debian there can be installed with: - - sudo apt-get install `cat debian-packages-deps | tr '\n' ' '` - -[debian-packages-deps]: https://raw.github.com/jansel/opentuner/master/debian-packages-deps -[sqlalchemy-dialects]: http://docs.sqlalchemy.org/en/rel_0_8/dialects/index.html - - -Installation -------------------- -OpenTuner (and dependencies) can be installed with - - sudo pip install opentuner - -or - - pip install --user opentuner - -This will not install any of the example programs. - - -Development installation -------------------- -For development (running OpenTuner out of a git checkout), a list of python -dependencies can be found in [requirements.txt][] these can be installed -system-wide with `pip`. - - sudo apt-get install python-pip - sudo pip install -r requirements.txt - -Or you can use virtual env to create a isolated python environment by running: - - python ./venv-bootstrap.py - -which will create a ./venv/bin/python (./venv/Scripts/python.exe on windows) -with all the required packages installed. - -[requirements.txt]: https://raw.github.com/jansel/opentuner/master/requirements.txt - - -Checking Installation ---------------------- - -Quickly checking that a successful installation has been made, may be performed -by running an example program such as: - - ./examples/rosenbrock/rosenbrock.py - - -Tutorials ---------- - -- [Optimizing Block Matrix Multiplication][gettingstarted] -- [Creating OpenTuner Techniques][technique-tutorial]. - -[gettingstarted]: http://opentuner.org/tutorial/gettingstarted/ -[technique-tutorial]: http://opentuner.org/tutorial/techniques/ - - -Papers ---------- - -- [OpenTuner: An Extensible Framework for Program Autotuning][paper1]. <br> - Jason Ansel, Shoaib Kamil, Kalyan Veeramachaneni, Jonathan Ragan-Kelley, - Jeffrey Bosboom, Una-May O'Reilly, Saman Amarasinghe. <br> - International Conference on Parallel Architectures and Compilation - Techniques. <br> - Edmonton, Canada. August, 2014. [Slides][slides1]. [Bibtex][bibtex1]. - -[paper1]: http://groups.csail.mit.edu/commit/papers/2014/ansel-pact14-opentuner.pdf -[bibtex1]: http://groups.csail.mit.edu/commit/bibtex.cgi?key=ansel:pact:2014 -[slides1]: http://groups.csail.mit.edu/commit/papers/2014/ansel-pact14-opentuner-slides.pdf - - -Contributing Code ------------------ - -The preferred way to contribute code to OpenTuner is to fork the project -on github and [submit a pull request][pull-req]. - -[pull-req]: https://www.openshift.com/wiki/github-workflow-for-submitting-pull-requests - - -Support -------- -OpenTuner is supported in part by the United States Department of Energy -[X-Stack][xstack] program as part of [D-TEC][dtec]. - -[xstack]: http://science.energy.gov/ascr/research/computer-science/ascr-x-stack-portfolio/ -[dtec]: http://www.dtec-xstack.org/ - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/accuracy_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/accuracy_tuner.py deleted file mode 100644 index 5977fe7ee5b4780139d2c5a865c8231361cf0f2c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/accuracy_tuner.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python -# - -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys - - -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -opt_confs_index = 9 -evaluated_configs = {} - - -def extractTotalOverhead(file_name): - - total_comps = 0.0 - file = open(file_name, "r") - for x in file: - words = x.split() - total_comps += float(words[opt_confs_index]) - - print total_comps - return total_comps - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print run_cmd - run_result_call_program = self.call_program(run_cmd) - #print run_result_call_program - - total_comps = extractTotalOverhead("accuracy_summary") - accuracy = getAccuracy("final_accuracy") - - #Result = opentuner.resultsdb.models.Result(time=total_comps) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('accuracy_summary', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - return Result - - - def save_final_config(self, configuration): - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - createFlagsFile("opentuner_flags", configuration.data) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_final_' + str(accuracy) ) - - sorted_list = sorted(self.configs_list, key = lambda tup: tup[0]) - print sorted_list[0:10] - - top_elems = 20 - if len(sorted_list) < top_elems: - top_elems = len(sorted_list) - - - for i in range(top_elems): - createFlagsFile("opentuner_flags", sorted_list[i][2]) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_' + str(accuracy) + "_rank_" + str(i) ) - - - #os.mkdir(result_dir + "full_results") - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='name of binary to run') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune for') - argparser.add_argument('--error-range', type=int, help='num of flags to tune for') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='accuracy threshold') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/accuracy_tuner_piped.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/accuracy_tuner_piped.py deleted file mode 100644 index 6d46c5762ead377292337c47d045ee5e58322954..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/accuracy_tuner_piped.py +++ /dev/null @@ -1,269 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence import dump_high_confidence_files -from select_top_results import select_top_results -from time import sleep - - -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -opt_confs_index = 9 -evaluated_configs = {} -orig_result_dir = "" - - -def extractTotalOverhead(file_name): - - total_comps = 0.0 - file = open(file_name, "r") - for x in file: - words = x.split() - total_comps += float(words[opt_confs_index]) - - print total_comps - return total_comps - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - -def kill(proc_pid): - process = psutil.Process(proc_pid) - for proc in process.children(recursive=True): - proc.kill() - process.kill() - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - FNULL = open(os.devnull, 'wb') - #run_result_call_program = self.call_program(run_cmd) - self.start_process = subprocess.Popen([binary_name, "opentuner_run"]) #, stdout=FNULL); - - try: - os.mkfifo("/tmp/myfifo") - except OSError, e: - print("FIFO exists") - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print run_cmd - #run_result_call_program = self.call_program(run_cmd) - - # Using Named Pipes to signal execution to the DNN outer thread - fifo = open("/tmp/myfifo", "w") - fifo.write("start_run") - fifo.close() - - print "Waiting for process to signal back - when done processing one run" - - fifo2 = open("/tmp/myfifo", "r") - fifo2.read() - fifo2.close() - - print "Process Signalled back" - - total_comps = extractTotalOverhead("accuracy_summary") - accuracy = getAccuracy("final_accuracy") - - - #Result = opentuner.resultsdb.models.Result(time=total_comps) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('accuracy_summary', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - print "done with one run" - - return Result - - - def save_final_config(self, configuration): - - print "Dumping High Confidence results" - sleep(5) - - # Only dumping files with 95% confidence - dump_high_confidence_files(binary_name, orig_result_dir, accuracy_threshold, 95) - select_top_results(orig_result_dir + "/high_confidence") - - - #self.start_process.kill() - kill(self.start_process.pid) - - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - return - - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - createFlagsFile("opentuner_flags", configuration.data) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_final_' + str(accuracy) ) - - sorted_list = sorted(self.configs_list, key = lambda tup: tup[0]) - print sorted_list[0:10] - - top_elems = 20 - if len(sorted_list) < top_elems: - top_elems = len(sorted_list) - - - for i in range(top_elems): - createFlagsFile("opentuner_flags", sorted_list[i][2]) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_' + str(accuracy) + "_rank_" + str(i) ) - - - #os.mkdir(result_dir + "full_results") - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='name of binary to run') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune for') - argparser.add_argument('--error-range', type=int, help='num of flags to tune for') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='accuracy threshold') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - orig_result_dir = result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/adddeps.py deleted file mode 100644 index 61fd4757d6a6045346e5cdcd3dfbfcdc00e236fa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/adddeps.py +++ /dev/null @@ -1,5 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/algo_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/algo_tuner.py deleted file mode 100644 index b8145e179893bc0db2631cf1f7ee0f11bcc9be0e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/algo_tuner.py +++ /dev/null @@ -1,318 +0,0 @@ -#!/usr/bin/env python -# -# Algorithmic Approximation Tuning -# Purpose: Tunes for Perforation, Sampling, Numerical Precision (FP16) - - -import adddeps - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence2 import dump_promise_confidence_files3 -from measure_confidence2 import getConfidence, getMinAccuracy -from select_top_results import select_top_results -from time import sleep -from pareto_curve import findParetoConfigs - - - - -class TunerData: - def __init__(self): - self.binary_path = "" - self.output_dir = "" - self.num_layers = 0 - self.knobs_list = [] - self.knobs_speedup = {} - self.accuracy_threshold = 0 - self.test_id = 0 - self.layer_costs = [] - self.tuning_flags = [] - self.autotuner_runs = 0 - - - -tunerData = TunerData() - - -def readCostFile(file_path): - - layer_costs = [] - f = open(file_path) - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - print ("len(layer_costs) = ", layer_costs) - f.close() - - return layer_costs - - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print (accuracy) - return accuracy - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for i in range(tunerData.num_layers): # flag in tunerData.tuning_flags: - flag = tunerData.tuning_flags[i] - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - - -def readLayerKnobs(file_path): - - f = open(file_path, "r") - knobs_list = [] - for x in f: - knobs = [] - vals = x.split(",") - for val in vals: - knobs.append(int(val)) - - knobs_list.append(knobs) - - print ("knobs_list = ", knobs_list) - - return knobs_list - - - -def readKnobConfig(file_path): - - knobs_speedup = {} - f = open(file_path, "r") - for x in f: - toks = x.split("\t") - ID = int(toks[0].split(",")[1]) - - speedup = float(toks[2]) - knobs_speedup[ID] = speedup - - print ("knobs_speedup = ", knobs_speedup) - - return knobs_speedup - - - - -def getConfigCost(cfg): - - orig_cost = 0.0 - total_cost = 0.0 - for it in range(tunerData.num_layers): - flag = tunerData.tuning_flags[it] - flag_value = cfg[flag] - op_cost = tunerData.layer_costs[it] - speedup = tunerData.knobs_speedup[flag_value] - - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - - it += 1 - - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - - return total_cost, speedup - - - -def appendTopLine(f_path, accuracy, total_runs, total_comps, speedup): - - f_str = open(f_path, "r").read() - - f_out = open(f_path, "w+") - - f_out.write("total_runs=" + str(total_runs) + "\tconfidence=100.0" + "\tavg_accuracy=" + str(accuracy) + "\tconfig_cost=" + str(total_comps) + "\tspeedup=" + str(speedup) + "\n" ) - f_out.write(f_str) - - f_out.close() - - - - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(tunerData.accuracy_threshold) - input_manager = FixedInputManager(size=tunerData.num_layers) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - - for i in range(tunerData.num_layers): - tunerData.tuning_flags.append("flag" + str(i)) - - - #for flag in tunerData.tuning_flags: - for ind in range(tunerData.num_layers): - flag = tunerData.tuning_flags[ind] - manipulator.add_parameter( - EnumParameter(flag, tunerData.knobs_list[ind])) - - print ("ind = ", ind, " len = ", len(tunerData.knobs_list)) - print (tunerData.knobs_list[ind]) - ind += 1 - - return manipulator - - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - global test_id - - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("promise_flags", cfg) - - run_cmd = tunerData.binary_path - print "\nbinary_path = ", run_cmd - - - total_runs = 1 # NOTE: Single run sufficient in Algorithmic Approx Tuner - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([run_cmd, str(total_runs)], stdout = FNULL) - p.wait() - - - accuracy = getAccuracy("final_accuracy") - - # getConfigCost returns the cost associated with the selected configuration - total_comps, speedup = getConfigCost(cfg) - - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - #Result.accuracy = accuracy - min_accuracy = getMinAccuracy("run_accuracies.txt") - print ("min_accuracy = ", min_accuracy) - Result.accuracy = min_accuracy - - if min_accuracy > tunerData.accuracy_threshold: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - f_path = tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) - shutil.copy('promise_flags', f_path) - - appendTopLine(f_path, accuracy, total_runs, total_comps, speedup) - - f_acc = open(tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) + "_accuracy", "w") - f_acc.write(str(accuracy)) - f_acc.close() - - - tunerData.test_id += 1 - - return Result - - - def save_final_config(self, configuration): - - print "Done with Autotuning Run \n" - sleep(2) - - print "Final configuration", configuration.data - - return - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-layers', type=int, help='num of flags to tune') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - argparser.add_argument('--cost-file', help='layer description') - argparser.add_argument('--knobs-config', help='knob settings and ID mapping') - argparser.add_argument('--layer-knobs', help='per-layer Knobs') - - - args = argparser.parse_args() - - tunerData.binary_path = str(args.binary) - tunerData.num_layers = int(args.num_layers) - tunerData.accuracy_threshold = float(args.accuracy) - - - # NOTE: Reading the cost file (with No of ops) to better guide the Autotuner - cost_file_path = args.cost_file - tunerData.layer_costs = readCostFile(cost_file_path) - - - tunerData.knobs_list = readLayerKnobs(args.layer_knobs) - tunerData.knobs_speedup = readKnobConfig(args.knobs_config) - - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - tunerData.output_dir = result_dir + "/high_confidence/" - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(tunerData.output_dir): - print("Creating output directory = ", tunerData.output_dir) - os.mkdir(tunerData.output_dir) - - - - ClangFlagsTuner.main(argparser.parse_args()) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/algo_tuner2.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/algo_tuner2.py deleted file mode 100644 index 4ca0062f93441954d3ee0acc0eabf10352e3a76c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/algo_tuner2.py +++ /dev/null @@ -1,339 +0,0 @@ -#!/usr/bin/env python -# -# Algorithmic Approximation Tuning -# Purpose: Tunes for Perforation, Sampling, Numerical Precision (FP16) - - -import adddeps - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence2 import dump_promise_confidence_files4 -from measure_confidence2 import getConfidence, getMinAccuracy -from select_top_results import select_top_results -from time import sleep -from pareto_curve import findParetoConfigs - - - - -class TunerData: - def __init__(self): - self.binary_path = "" - self.output_dir = "" - self.num_layers = 0 - self.knobs_list = [] - self.knobs_speedup = {} - self.accuracy_threshold = 0 - self.test_id = 0 - self.layer_costs = [] - self.tuning_flags = [] - self.autotuner_runs = 0 - - - -tunerData = TunerData() - - -orig_result_dir = "" - - -def readCostFile(file_path): - - layer_costs = [] - f = open(file_path) - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - print ("len(layer_costs) = ", layer_costs) - f.close() - - return layer_costs - - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print (accuracy) - return accuracy - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for i in range(tunerData.num_layers): # flag in tunerData.tuning_flags: - flag = tunerData.tuning_flags[i] - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - - -def readLayerKnobs(file_path): - - f = open(file_path, "r") - knobs_list = [] - for x in f: - knobs = [] - vals = x.split(",") - for val in vals: - knobs.append(int(val)) - - knobs_list.append(knobs) - - print ("knobs_list = ", knobs_list) - - return knobs_list - - - -def readKnobConfig(file_path): - - knobs_speedup = {} - f = open(file_path, "r") - for x in f: - toks = x.split("\t") - ID = int(toks[0].split(",")[1]) - - speedup = float(toks[2]) - knobs_speedup[ID] = speedup - - print ("knobs_speedup = ", knobs_speedup) - - return knobs_speedup - - - - -def getConfigCost(cfg): - - orig_cost = 0.0 - total_cost = 0.0 - for it in range(tunerData.num_layers): - flag = tunerData.tuning_flags[it] - flag_value = cfg[flag] - op_cost = tunerData.layer_costs[it] - speedup = tunerData.knobs_speedup[flag_value] - - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - - it += 1 - - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - - return total_cost, speedup - - - -def appendTopLine(f_path, accuracy, total_runs, total_comps, speedup): - - f_str = open(f_path, "r").read() - - f_out = open(f_path, "w+") - - f_out.write("total_runs=" + str(total_runs) + "\tconfidence=100.0" + "\tavg_accuracy=" + str(accuracy) + "\tconfig_cost=" + str(total_comps) + "\tspeedup=" + str(speedup) + "\n" ) - f_out.write(f_str) - - f_out.close() - - - - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(tunerData.accuracy_threshold) - input_manager = FixedInputManager(size=tunerData.num_layers) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - - for i in range(tunerData.num_layers): - tunerData.tuning_flags.append("flag" + str(i)) - - - #for flag in tunerData.tuning_flags: - for ind in range(tunerData.num_layers): - flag = tunerData.tuning_flags[ind] - manipulator.add_parameter( - EnumParameter(flag, tunerData.knobs_list[ind])) - - print ("ind = ", ind, " len = ", len(tunerData.knobs_list)) - print (tunerData.knobs_list[ind]) - ind += 1 - - return manipulator - - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - global test_id - - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("promise_flags", cfg) - - run_cmd = tunerData.binary_path - print "\nbinary_path = ", run_cmd - - - input_size = 5000 - offset = 5000 - - total_runs = 2 # NOTE: Single run sufficient in Algorithmic Approx Tuner - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([run_cmd, str(total_runs), str(tunerData.accuracy_threshold), str(1), str(input_size), str(offset) ], stdout = FNULL) - p.wait() - - #total_runs = 2 # NOTE: Atleast two runs for promise tuner - #FNULL = open(os.devnull, 'wb') - #p = subprocess.Popen([run_cmd, str(total_runs)], stdout = FNULL) - #p.wait() - - - accuracy = getAccuracy("final_accuracy") - - # getConfigCost returns the cost associated with the selected configuration - total_comps, speedup = getConfigCost(cfg) - - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - #Result.accuracy = accuracy - min_accuracy = getMinAccuracy("run_accuracies.txt") - print ("min_accuracy = ", min_accuracy) - Result.accuracy = min_accuracy - - if min_accuracy > tunerData.accuracy_threshold: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - f_path = tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) - shutil.copy('promise_flags', f_path) - - appendTopLine(f_path, accuracy, total_runs, total_comps, speedup) - - - tunerData.test_id += 1 - - return Result - - - def save_final_config(self, configuration): - - print "Done with Autotuning Run \n" - sleep(2) - - - #findParetoConfigs(orig_result_dir, layer_costs, accuracy_threshold) - - input_dir = orig_result_dir + "/full_results/" - output_dir = orig_result_dir + "/high_confidence/" - - # Only dumping files with 95% confidence - dump_promise_confidence_files4(tunerData.binary_path, input_dir, output_dir, tunerData.layer_file, tunerData.num_layers, tunerData.accuracy_threshold, tunerData.layer_costs, 95, tunerData.knobs_speedup) - - - print "Final configuration", configuration.data - - return - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-layers', type=int, help='num of flags to tune') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - - argparser.add_argument('--layer-file', help='layer file') - - argparser.add_argument('--cost-file', help='layer description') - argparser.add_argument('--knobs-config', help='knob settings and ID mapping') - argparser.add_argument('--layer-knobs', help='per-layer Knobs') - - - args = argparser.parse_args() - - tunerData.binary_path = str(args.binary) - tunerData.num_layers = int(args.num_layers) - tunerData.accuracy_threshold = float(args.accuracy) - - tunerData.layer_file = args.layer_file - - # NOTE: Reading the cost file (with No of ops) to better guide the Autotuner - cost_file_path = args.cost_file - tunerData.layer_costs = readCostFile(cost_file_path) - - - tunerData.knobs_list = readLayerKnobs(args.layer_knobs) - tunerData.knobs_speedup = readKnobConfig(args.knobs_config) - - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - orig_result_dir = result_dir - tunerData.output_dir = result_dir + "/full_results/" - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(tunerData.output_dir): - print("Creating output directory = ", tunerData.output_dir) - os.mkdir(tunerData.output_dir) - - - - ClangFlagsTuner.main(argparser.parse_args()) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/approxhpvm_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/approxhpvm_tuner.py deleted file mode 100644 index 9ae2266bf481a9dd772fd139b375463b35bcd1b9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/approxhpvm_tuner.py +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence2 import dump_high_confidence_files -from select_top_results import select_top_results -from time import sleep - - -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -opt_confs_index = 9 -evaluated_configs = {} -orig_result_dir = "" - - -def copyTunerRuntime(): - tensor_rt_path = os.environ["LLVM_SRC_ROOT"] - if tensor_rt_path == "": - print "LLVM_SRC_ROOT NOT SET" - sys.exit(0) - - print "tensor_rt_path = ", tensor_rt_path - - - - -def extractTotalOverhead(file_name): - - total_comps = 0.0 - file = open(file_name, "r") - for x in file: - words = x.split() - total_comps += float(words[opt_confs_index]) - - print total_comps - return total_comps - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - -def kill(proc_pid): - process = psutil.Process(proc_pid) - for proc in process.children(recursive=True): - proc.kill() - process.kill() - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print "binary_name = ", run_cmd - #run_result_call_program = self.call_program(run_cmd) - #print "returned \n\n" - - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen(run_cmd, stdout = FNULL) - p.wait() - - total_comps = extractTotalOverhead("accuracy_summary") - accuracy = getAccuracy("final_accuracy") - - #Result = opentuner.resultsdb.models.Result(time=total_comps) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('accuracy_summary', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - print "done with one run" - - return Result - - - def save_final_config(self, configuration): - - print "Dumping High Confidence results" - sleep(5) - - # Only dumping files with 95% confidence - dump_high_confidence_files(binary_name, orig_result_dir, accuracy_threshold, 95) - select_top_results(orig_result_dir + "/high_confidence") - - - #self.start_process.kill() - kill(self.start_process.pid) - - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - return - - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - createFlagsFile("opentuner_flags", configuration.data) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_final_' + str(accuracy) ) - - sorted_list = sorted(self.configs_list, key = lambda tup: tup[0]) - print sorted_list[0:10] - - top_elems = 20 - if len(sorted_list) < top_elems: - top_elems = len(sorted_list) - - - for i in range(top_elems): - createFlagsFile("opentuner_flags", sorted_list[i][2]) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_' + str(accuracy) + "_rank_" + str(i) ) - - - #os.mkdir(result_dir + "full_results") - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='name of binary to run') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune for') - argparser.add_argument('--error-range', type=int, help='num of flags to tune for') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='accuracy threshold') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - orig_result_dir = result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/devtuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/devtuner.py deleted file mode 100644 index 4d5da6afb6d95e1372c8dbea00fec07494c46426..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/devtuner.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/usr/bin/env python -# -# Development-time Tuner with Algorithmic Approximations: -# Approximations: Perforation, Sampling with varying knobs for rate, skip offset - - -import adddeps - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil -import thread - -from select_top_results import select_top_results -from time import sleep -from pareto_curve import findParetoConfigs -import utils - - - -class TunerData: - def __init__(self): - self.binary_path = "" - self.output_dir = "" - self.num_layers = 0 - self.knobs_list = [] - self.knobs_speedup = {} - self.accuracy_threshold = 0 - self.accuracy_slack = 0 - self.test_id = 0 - self.layer_costs = [] - self.tuning_flags = [] - self.autotuner_runs = 0 - self.best_speedup = 1 - self.log_file = "" - - self.use_seed = True - - - -class DevTuner(MeasurementInterface): - - - def initTunerData(self, args): - - self.tunerData.binary_path = str(args.binary) - self.tunerData.num_layers = int(args.num_layers) - self.tunerData.accuracy_threshold = float(args.accuracy) - self.tunerData.accuracy_slack = float(args.accuracy_slack) - - # NOTE: Reading the cost file (with No of ops) to better guide the Autotuner - cost_file_path = args.cost_file - self.tunerData.layer_costs = utils.readCostFile(cost_file_path) - - self.tunerData.knobs_list = utils.readLayerKnobs(args.layer_knobs) - self.tunerData.knobs_speedup = utils.readGlobalKnobConfig(args.knobs_config) - self.tunerData.test_id = args.start_id - - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - self.tunerData.output_dir = result_dir + "/high_confidence/" - if not os.path.exists(self.tunerData.output_dir): - print("Creating output directory = ", self.tunerData.output_dir) - os.mkdir(self.tunerData.output_dir) - - - def createFIFO(self): - try: - os.mkfifo("/tmp/opentuner_fifo") - except OSError, e: - print("FIFO exists") - - - def invokeBinary(self, runs): - - print ("\n\n\n\n SPAWNING BINARY *****\n\n***") - run_cmd = self.tunerData.binary_path - print "\nbinary_path = ", run_cmd - FNULL = open(os.devnull, 'wb') - self.process = subprocess.Popen([run_cmd, str(runs)], stdout = FNULL) - - - - def signalPipe(self): - - fifo = open("/tmp/opentuner_fifo", "w") - fifo.write("start_run") - fifo.close() - #print "Waiting for process to signal back - when done processing one run" - - - - def pollOnProcess(self, self2): - - print (" self.piped_execution = ", self.piped_execution, "*** \n") - sleep(5) - - while (not self.escape_poll_thread): - poll = self.process.poll() - #print ("POLLING") - - if poll is not None: # If process aborted, invoke another instance - sleep(6) - - poll = self.process.poll() - if not utils.check_pid(self.process.pid) and poll is not None: # Second check for process existence - self.corrupted_run = True - utils.process_kill(self.process.pid) # Kill existing process if exists - self.invokeBinary(100000) - self.signalPipe() - - - - def waitOnPipe(self): - - fifo2 = open("/tmp/opentuner_fifo", "r") - fifo2.read() - fifo2.close() - - - - def stopProcess(self): - - fifo = open("/tmp/opentuner_fifo", "w") - fifo.write("stop_run") - fifo.close() - print "***** Sending Stop Signal ***** " - - - def __init__(self, args): - - #print ("\n\n\n\n\n\******* ARGS[0] = ", args) - - self.tunerData = TunerData() - self.initTunerData(args) - - # Adding knob to use piped execution instead - self.piped_execution = True - self.corrupted_run = False - self.escape_poll_thread = False - - objective = ThresholdAccuracyMinimizeTime(self.tunerData.accuracy_threshold) - input_manager = FixedInputManager(size=self.tunerData.num_layers) - self.configs_list = [] - # initializing tuner related data - - super(DevTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - log_path = self.tunerData.output_dir + "/log_file.txt" - self.log_file = open(log_path, "a+") - - - if self.piped_execution: - self.createFIFO() - self.invokeBinary(100000) - print ("Invoking thread to launch a Polling THREAD ") - sleep(10) - thread.start_new_thread(self.pollOnProcess, (self, )) - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - - for i in range(self.tunerData.num_layers): - self.tunerData.tuning_flags.append("flag" + str(i)) - - - for ind in range(self.tunerData.num_layers): - flag = self.tunerData.tuning_flags[ind] - manipulator.add_parameter( - EnumParameter(flag, self.tunerData.knobs_list[ind])) - - print ("ind = ", ind, " len = ", len(self.tunerData.knobs_list)) - print (self.tunerData.knobs_list[ind]) - ind += 1 - - return manipulator - - - - def seed_configurations(self): - """Provide baseline config as seed if model uses seed.""" - baseline_config = {layer: 11 for layer in self.tunerData.tuning_flags} - return [baseline_config] if self.tunerData.use_seed else [] - - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - global test_id - - cfg = desired_result.configuration.data - - - print ("cfg = ", cfg) - - # NOTE: creates flags file used by hpvm-tensor-rt - utils.genLayerFlagsFile("promise_flags", cfg, self.tunerData) - - - total_runs = 1 # NOTE: Single run sufficient in Algorithmic Approx Tuner - if not self.piped_execution: - run_cmd = self.tunerData.binary_path - print "\nbinary_path = ", run_cmd - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([run_cmd, str(total_runs)], stdout = FNULL) - p.wait() - - - waitSignal = 0 - if self.piped_execution: - self.signalPipe() - waitSignal = self.waitOnPipe() - - accuracy = utils.readAccuracy("final_accuracy") - - if self.corrupted_run == True: - accuracy = self.tunerData.accuracy_slack - 5 # Invalid Run - print ("\n\n\n **** Corrupted Run **** Accuracy = ", accuracy, " --- \n\n\n") - self.corrupted_run = False - - # getConfigCost returns the cost associated with the selected configuration - total_comps, speedup = utils.computeConfigCost(cfg, self.tunerData) - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - - if accuracy > self.tunerData.accuracy_slack: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - f_path = self.tunerData.output_dir + '/' + self.tunerData.binary_path + '_' + str(self.tunerData.test_id) - shutil.copy('promise_flags', f_path) - - utils.addInfoToOutFile(f_path, accuracy, total_runs, total_comps, speedup) - - print ("------ Config Chosen with Accuracy = ", accuracy, " And Predicted Speedup = ", speedup, "\n") - - if speedup > self.tunerData.best_speedup: - self.tunerData.best_speedup = speedup - - - - if self.tunerData.test_id % 100 == 0: - self.log_file.write("** iteration = " + str(self.tunerData.test_id) + \ - " speedup = " + str(self.tunerData.best_speedup) + " \n") - - - - - self.tunerData.test_id += 1 - - return Result - - - - def save_final_config(self, configuration): - - # Indication to terminate polling thread - self.escape_poll_thread = True - - if self.piped_execution: - #self.stopProcess() - utils.process_kill(self.process.pid) - print ("Killed hanging process") - - print "Final configuration", configuration.data - - # Close log file - self.log_file.close() - - print "Done with Autotuning Run \n" - sleep(2) - - return - - - - -if __name__ == '__main__': - - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-layers', type=int, help='num of flags to tune') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--accuracy-slack', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - argparser.add_argument('--cost-file', help='layer description') - argparser.add_argument('--knobs-config', help='knob settings and ID mapping') - argparser.add_argument('--layer-knobs', help='per-layer Knobs') - # NOTE: needed to have unique file-names across runs - argparser.add_argument('--start-id', type=int, help='start id for naming output files') - - - args = argparser.parse_args() - #devTuner = DevTuner(args) - print ("\n\n\n\n\ -- NOTE --- \n\n") - DevTuner.main(argparser.parse_args()) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/gettingstarted.md b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/gettingstarted.md deleted file mode 100644 index 8a442c5f44d6c501f686125d4468ca642f745920..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/gettingstarted.md +++ /dev/null @@ -1,215 +0,0 @@ ---- -layout: default -title: OpenTuner - Using OpenTuner -permalink: /tutorial/gettingstarted/index.html ---- - -Tutorial: Optimizing Block Matrix Multiplication -================================================ - -This tutorial assumes that you have checked out a copy of opentuner. For -guidelines on how to get opentuner set up, refer [here][setup]. - -[setup]: http://opentuner.org/tutorial/setup/ - -Identifying a Program to Autotune ---------------------------------- - -In order to do autotuning, you first need something to autotune. This will -normally be your own program that you want to make either fast or better in -some way. For this tutorial we will use a blocked version of matrix multiply -as an example. We will use opentuner to find the optimal value of the block -size parameter. - -We will autotune the sample code below(based off of modification of code -found [here][matrix-multiply-code]), making sure to take the block size as -a compile time constant to the program. - -[matrix-multiply-code]: http://csapp.cs.cmu.edu/public/waside/waside-blocking.pdf - -Save the sample code below to examples/tutorials/mmm_block.cpp - - #include <stdio.h> - #include <cstdlib> - - #define N 100 - - int main(int argc, const char** argv) - { - - int n = BLOCK_SIZE * (N/BLOCK_SIZE); - int a[N][N]; - int b[N][N]; - int c[N][N]; - int sum=0; - for(int k1=0;k1<n;k1+=BLOCK_SIZE) - { - for(int j1=0;j1<n;j1+=BLOCK_SIZE) - { - for(int k1=0;k1<n;k1+=BLOCK_SIZE) - { - for(int i=0;i<n;i++) - { - for(int j=j1;j<j1+BLOCK_SIZE;j++) - { - sum = c[i][j]; - for(int k=k1;k<k1+BLOCK_SIZE;k++) - { - sum += a[i][k] * b[k][j]; - } - c[i][j] = sum; - } - } - } - } - } - return 0; - } - -Creating a New Autotuner with Opentuner ------------------------------------- -Now we need to create a program that uses OpenTuner to optimize the program we just saved. - -Save the following code to examples/tutorials/mmm_tuner.py - - #!/usr/bin/env python - # - # Optimize blocksize of apps/mmm_block.cpp - # - # This is an extremely simplified version meant only for tutorials - # - import adddeps # fix sys.path - - import opentuner - from opentuner import ConfigurationManipulator - from opentuner import IntegerParameter - from opentuner import MeasurementInterface - from opentuner import Result - - - class GccFlagsTuner(MeasurementInterface): - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - manipulator.add_parameter( - IntegerParameter('blockSize', 1, 10)) - return manipulator - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - gcc_cmd = 'g++ mmm_block.cpp ' - gcc_cmd += '-DBLOCK_SIZE='+ cfg['blockSize'] - gcc_cmd += ' -o ./tmp.bin' - - compile_result = self.call_program(gcc_cmd) - assert compile_result['returncode'] == 0 - - run_cmd = './tmp.bin' - - run_result = self.call_program(run_cmd) - assert run_result['returncode'] == 0 - - return Result(time=run_result['time']) - - def save_final_config(self, configuration): - """called at the end of tuning""" - print "Optimal block size written to mmm_final_config.json:", configuration.data - self.manipulator().save_to_file(configuration.data, - 'mmm_final_config.json') - - - if __name__ == '__main__': - argparser = opentuner.default_argparser() - GccFlagsTuner.main(argparser.parse_args()) - - -This file consists of several components, each of which will be discussed in further detail below. - -Tuning Programs have a general structure as follows: - - from opentuner import MeasurementInterface - from opentuner import Result - -Create an instance of class GccFlagsTuner, which tunes specified parameters using opentuner. - class GccFlagsTuner(MeasurementInterface): - -The manipulator method defines the variable search space by specifying parameters that should be tuned by this instance of GccFlagsTuner - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - manipulator.add_parameter( - IntegerParameter('blockSize', 1, 10)) - return manipulator - -The run method actually runs opentuner under the given configuration and returns the calculated performance under this configuration. In this example, the blockSize parameter to be tuned is input as a compile-time constant that takes on a value within the specified range each time it is run. However, opentuner also supports other methods of specifying these parameters that may be preferred in different use cases. - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - gcc_cmd = 'g++ mmm_block.cpp ' - gcc_cmd += '-DBLOCK_SIZE='+ cfg['blockSize'] - gcc_cmd += ' -o ./tmp.bin' - - compile_result = self.call_program(gcc_cmd) - assert compile_result['returncode'] == 0 - - run_cmd = './tmp.bin' - - run_result = self.call_program(run_cmd) - assert run_result['returncode'] == 0 - - return Result(time=run_result['time']) - -We can actually display the result of running opentuner(the optimal block size for our multiplication problem) by creating a method, save_final_config() in our class. This saves a json dictionary of the optimal blockSize parameter found to the file mmm_final_config.json - - def save_final_config(self, configuration): - """called at the end of tuning""" - print "Optimal block size written to mmm_final_config.json:", configuration.data - self.manipulator().save_to_file(configuration.data, - 'mmm_final_config.json') - - if __name__ == '__main__': - argparser = opentuner.default_argparser() - GccFlagsTuner.main(argparser.parse_args()) - -Generating and Viewing Results ------------------------------- - -Run the following command to autotune our program(The --no-dups flag hides warnings about duplicate results and the --stop-after parameter specifies that we are running opentuner for a maximum of 30 seconds): - - python mmm_tuner.py --no-dups --stop-after=30 - -The results of each run configuration will be displayed as follows(output lines are truncated for readability here): - - [ 10s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - [ 19s] INFO opentuner.search.metatechniques: AUCBanditMetaTechniqueA: [('DifferentialEvolutionAlt', 477), ('UniformGreedyMutation', 18), ('NormalGreedyMutation', 5), ('RandomNelderMead', 1)] - [ 20s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - [ 30s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - [ 30s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - Optimal block size written to mmm_final_config.json: {'BLOCK_SIZE': 4} - - -Look up the optimal BlockSize value by inspecting the following created file: - - mmm_final_config.json - -In this example, the output file content was as follows: - - {'BLOCK_SIZE': 4} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/measure_confidence.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/measure_confidence.py deleted file mode 100644 index dd7a050ac8428f99872abd25d1aa2f3d794f7e2b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/measure_confidence.py +++ /dev/null @@ -1,290 +0,0 @@ - -import argparse -import os -import sys -from time import sleep - - -def getAccuracy(file_name): - - if not os.path.exists(file_name): - print("final_accuracy file not found ") - sys.exit(0) - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -total_runs = 40 -skip_lines = 0 - - -def test_func(): - print "test_func" - sys.exit(0) - - -def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold): - - #total_runs = 100.0 - successful_runs = 0.0 - total_acc = 0 - - for i in range(int(total_runs)): - - fifo = open("/tmp/myfifo", "w") - fifo.write("start_run") - fifo.close() - - print "Waiting for process to signal back - when done processing one run" - - fifo2 = open("/tmp/myfifo", "r") - fifo2.read() - fifo2.close() - - print "Process Signalled back" - - accuracy = getAccuracy("final_accuracy") - total_acc += accuracy - - if accuracy > accuracy_threshold: - successful_runs += 1 - - confidence = (successful_runs / (total_runs*1.0) ) * 100.0 - print("confidence = ", confidence) - avg_acc = total_acc / total_runs - print("average accuracy = ", avg_acc) - - return confidence, avg_acc - - -def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("opentuner_flags", "w+") - - index = 0 - results_str = "" - for x in f: - if index >= skip_lines: - error_knob = int(float(x.split()[1])) - print error_knob - tuner_file.write(str(error_knob) + "\n") - - results_str += x - index += 1 - - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs(binary_name, accuracy, confidence) - - if run_confidence > 90: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(results_str) - f2.close() - - conf_result = (run_confidence, avg_accuracy, file_name) - confidence_list.append(conf_result) - - return confidence_list - - - - -def compute_promise_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("opentuner_flags", "w+") - - config_str = f.read() - tuner_file.write(config_str) - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs(binary_name, accuracy, confidence) - - if run_confidence > 90: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(config_str) - f2.close() - - flags_str = config_str.replace('\n', ',') - conf_result = (run_confidence, avg_accuracy, file_name, flags_str) - confidence_list.append(conf_result) - - return confidence_list - - - - -def dump_high_confidence_files(binary, result_dir, accuracy, confidence): - - #result_dir = args.result_dir - output_dir = result_dir + "/high_confidence" - result_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - print confidence_list - - # descending sort on confidence - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - print "Dumped Confidence Summary" - - -def processLayerFile(layer_file_path): - - layer_sizes = [] - layer_file = open(layer_file_path, "r") - - for layer_size in layer_file: - try: - size = int(layer_size) - layer_sizes.append(size) - except: - return layer_sizes - - return layer_sizes - - - -def getLayerConfigStr(config_str, layer_sizes, num_flags): - - new_config_str = "" - config_vals = config_str.split(',') - it_count = 0 - for val in config_vals: - if val == "": - continue - - config_val = int(val) - # For FP32 and FP32 values, each tensor op needs to be annotated - if config_val == 8 or config_val == 9: - for i in range(layer_sizes[it_count] - 1): - new_config_str += val + " " - new_config_str += val - if it_count < num_flags - 1: - new_config_str += "," - else: - new_config_str += val - if it_count < num_flags - 1: - new_config_str += "," - - it_count += 1 - - return new_config_str - - -def dump_promise_confidence_files(binary, result_dir, layer_file_path, num_flags, accuracy, confidence): - - #result_dir = args.result_dir - output_dir = result_dir + "/high_confidence" - input_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - - layer_sizes = processLayerFile(layer_file_path); - print layer_sizes - sleep(3) - - confidence_list = compute_promise_confidence(binary, accuracy, confidence, input_dir, output_dir) - print confidence_list - - # Ascending sort on accuracy - sorted_list = sorted(confidence_list, key = lambda tup: tup[1]) - - promise_file = open(output_dir + "/promise_confs.txt", "w+") - confidence_file = open(output_dir + "/confidence_summary.txt", "w+") - - max_configs = 50 - it_count = 0 - for x in sorted_list: - if x[1] > accuracy and x[0] > confidence: - config_str = getLayerConfigStr(x[3], layer_sizes, num_flags) - promise_file.write(config_str + "\n") - it_count += 1 - if it_count > max_configs: - break - - confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n") - - promise_file.close() - confidence_file.close() - - print "Dumped Confidence Summary" - - - - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - argparser.add_argument('--output-dir', help='Directory for storing output directory') - argparser.add_argument('--binary', help='Binary name to run') - argparser.add_argument('--accuracy', type=float, help='Accuracy constraint') - argparser.add_argument('--confidence', type=float, help='Confidence threshold') - - - args = argparser.parse_args() - result_dir = args.result_dir - output_dir = args.output_dir - binary = args.binary - accuracy = args.accuracy - confidence = args.confidence - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - #print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/measure_confidence2.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/measure_confidence2.py deleted file mode 100644 index f5998ff3c871fe2db625873dc75fcf8fe4452838..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/measure_confidence2.py +++ /dev/null @@ -1,664 +0,0 @@ - -import argparse -import os -import sys -import subprocess -from time import sleep - - -def getAccuracy(file_name): - - if not os.path.exists(file_name): - print("final_accuracy file not found ") - sys.exit(0) - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -total_runs = 40.0 -fails_allowed = 3 -skip_lines = 0 - - -def test_func(): - print "test_func" - sys.exit(0) - - - -def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold): - - successful_runs = 0.0 - unsuccessful_runs = 0.0 - total_acc = 0 - - for i in range(int(total_runs)): - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen(binary_name, stdout = FNULL) - p.wait() - - accuracy = getAccuracy("final_accuracy") - total_acc += accuracy - - if accuracy > accuracy_threshold: - successful_runs += 1 - else: - unsuccessful_runs += 1 - - if unsuccessful_runs > 6: - break - - - confidence = (successful_runs / total_runs) * 100.0 - print("confidence = ", confidence) - avg_acc = total_acc / total_runs - print("average accuracy = ", avg_acc) - - return confidence, avg_acc - - - -def getConfidence(accuracy_outfile, acc_threshold): - - f = open(accuracy_outfile, "r") - - total_acc = 0.0 - failed = 0 - it = 0 - - for x in f: - acc = float(x.strip()) - if acc < acc_threshold: - failed += 1 - - total_acc += acc - it += 1 - - conf = (it * 1.0 - failed) / it * 100 - avg_acc = total_acc / it - - return conf, avg_acc - - - -def getMinAccuracy(accuracy_outfile): - - f = open(accuracy_outfile, "r") - - total_acc = 0.0 - failed = 0 - it = 0 - - acc_list = [] - for x in f: - acc = float(x.strip()) - acc_list.append(acc) - - return min(acc_list) - - -# NOTE: invokes the binary with the number of runs -def do_multiple_runs2(binary_name, accuracy_threshold, confidence_threshold): - - successful_runs = 0.0 - unsuccessful_runs = 0.0 - total_acc = 0 - - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([binary_name, str(int(total_runs)), str(accuracy_threshold), str(fails_allowed)], stdout = FNULL) - p.wait() - - confidence, avg_acc = getConfidence("run_accuracies.txt", accuracy_threshold) - - print("confidence = ", confidence) - print("average accuracy = ", avg_acc) - - return confidence, avg_acc - - - - - -def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("opentuner_flags", "w+") - - index = 0 - results_str = "" - for x in f: - if index >= skip_lines: - error_knob = int(float(x.split()[1])) - print error_knob - tuner_file.write(str(error_knob) + "\n") - - results_str += x - index += 1 - - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs2(binary_name, accuracy, confidence) - - if run_confidence >= 95: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(results_str) - f2.close() - - conf_result = (run_confidence, avg_accuracy, file_name) - confidence_list.append(conf_result) - - return confidence_list - - - -def dump_high_confidence_files(binary, result_dir, accuracy, confidence): - - #result_dir = args.result_dir - output_dir = result_dir + "/high_confidence" - result_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - print "Dumped Confidence Summary" - - - - -def processLayerFile(layer_file_path): - - layer_sizes = [] - layer_file = open(layer_file_path, "r") - - for layer_desc in layer_file: - try: - toks = layer_desc.split(",") - if len(toks) < 2: # Not layer size description - continue - - size = int(toks[1]) - if "NML" in layer_desc: - size = -1 - layer_sizes.append(size) - except: - return layer_sizes - - return layer_sizes - - - -def getLayerConfigStr(config_str, layer_sizes, num_flags): - - new_config_str = "" - config_vals = config_str.split(',') - it_count = 0 - layer_count = 0 - - #for layer_size in val in config_vals: - for layer_depth_size in layer_sizes: - - if layer_depth_size == -1: - new_config_str += "8" - layer_count += 1 - if layer_count < len(layer_sizes): - new_config_str += "," - continue - - val = config_vals[it_count] - if val == "": - continue - - config_val = int(val) - # For FP32 and FP32 values, each tensor op needs to be annotated - if config_val == 8 or config_val == 9: - for i in range(layer_depth_size - 1): - new_config_str += val + " " - new_config_str += val - if layer_count < len(layer_sizes) - 1: - new_config_str += "," - else: - new_config_str += val - if layer_count < len(layer_sizes) - 1: - new_config_str += "," - - it_count += 1 - layer_count += 1 - - - return new_config_str - - - -def compute_promise_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("promise_flags", "w+") - - config_str = f.read() - tuner_file.write(config_str) - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs(binary_name, accuracy, confidence) - - if run_confidence >= 95: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(config_str) - f2.close() - - flags_str = config_str.replace('\n', ',') - conf_result = (run_confidence, avg_accuracy, file_name, flags_str) - confidence_list.append(conf_result) - - return confidence_list - - - -def getConfigCost(layer_costs, config_str): - - tuning_flags = config_str.split("\n") - - it = 0 - orig_cost = 0.0 - total_cost = 0.0 - for flag in tuning_flags: - - flag_value = -1 - try: - flag_value = int(flag) - except: - continue - - orig_cost += layer_costs[it] - - #print ("orig_cost = ", orig_cost, " flag_value = ", flag_value) - - if flag_value == 11: - total_cost += layer_costs[it] - elif flag_value == 10: - total_cost += (layer_costs[it] / 1.3) - elif flag_value == 8 or flag_value == 9: - total_cost += (layer_costs[it] / 1.6) - elif flag_value < 8: - divisor = 5 + (7 - flag_value) - total_cost += (layer_costs[it] / divisor) - - it += 1 - - speedup = orig_cost * 1.0 / total_cost * 1.0 - - return total_cost, speedup - - - - - -def getConfigCost2(layer_costs, knobs_speedup, config_flags): - - orig_cost = 0.0 - total_cost = 0.0 - for it in range(len(config_flags)): - flag_value = config_flags[it] - op_cost = layer_costs[it] - speedup = knobs_speedup[flag_value] - - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - - it += 1 - - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - - return total_cost, speedup - - - - - -def compute_promise_confidence2(binary_name, accuracy, confidence, layer_costs, - result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("promise_flags", "w+") - - config_str = f.read() - tuner_file.write(config_str) - tuner_file.close() - - #run_confidence, avg_accuracy = do_multiple_runs(binary_name, accuracy, confidence) - run_confidence, avg_accuracy = do_multiple_runs2(binary_name, accuracy, confidence) - - if run_confidence >= 95: - f2 = open(output_dir + "/" + file_name, "w+") - - config_cost, speedup = getConfigCost(layer_costs, config_str) - - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\t config_cost=" + str(config_cost) + "\t speedup=" + str(speedup) + "\n") - f2.write(config_str) - f2.close() - - flags_str = config_str.replace('\n', ',') - conf_result = (run_confidence, avg_accuracy, file_name, flags_str) - confidence_list.append(conf_result) - - return confidence_list - - - - - -def compute_promise_confidence3(binary_name, accuracy, confidence, layer_costs, - result_dir, output_dir, knobs_speedup): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("promise_flags", "w+") - - config_flags = [] - config_str = "" - it = 0 - for x in f: - - if it > 0: - config_str += x - config_flags.append(int(x.strip())) - tuner_file.write(x) - it += 1 - - tuner_file.close() - - - #run_confidence, avg_accuracy = do_multiple_runs(binary_name, accuracy, confidence) - run_confidence, avg_accuracy = do_multiple_runs2(binary_name, accuracy, confidence) - - if run_confidence >= 95: - f2 = open(output_dir + "/" + file_name, "w+") - - config_cost, speedup = getConfigCost2(layer_costs, knobs_speedup, config_flags) - - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\t config_cost=" + str(config_cost) + "\t speedup=" + str(speedup) + "\n") - f2.write(config_str) - f2.close() - - flags_str = config_str.replace('\n', ',') - conf_result = (run_confidence, avg_accuracy, file_name, flags_str) - confidence_list.append(conf_result) - - return confidence_list - - - - -def dump_promise_confidence_files(binary, result_dir, layer_file_path, - num_flags, accuracy, confidence): - - #result_dir = args.result_dir - output_dir = result_dir + "/high_confidence" - input_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - layer_sizes = processLayerFile(layer_file_path); - print layer_sizes - sleep(2) - - confidence_list = compute_promise_confidence(binary, accuracy, confidence, input_dir, output_dir) - print confidence_list - - # Ascending sort on accuracy - sorted_list = sorted(confidence_list, key = lambda tup: tup[1]) - - promise_file = open(output_dir + "/promise_confs.txt", "w+") - confidence_file = open(output_dir + "/confidence_summary.txt", "w+") - - max_configs = 50 - it_count = 0 - for x in sorted_list: - if x[1] > accuracy and x[0] > confidence: - config_str = getLayerConfigStr(x[3], layer_sizes, num_flags) - promise_file.write(config_str + "\n") - it_count += 1 - if it_count > max_configs: - break - - confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n") - - promise_file.close() - confidence_file.close() - - print "Dumped Confidence Summary" - - - - - -def dump_promise_confidence_files2(binary, result_dir, layer_file_path, - num_flags, accuracy, layer_costs, confidence): - - - #result_dir = args.result_dir - output_dir = result_dir + "/high_confidence" - input_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - layer_sizes = processLayerFile(layer_file_path); - print layer_sizes - sleep(2) - - confidence_list = compute_promise_confidence2(binary, accuracy, confidence, layer_costs, input_dir, output_dir) - print confidence_list - - # Ascending sort on accuracy - sorted_list = sorted(confidence_list, key = lambda tup: tup[1]) - - promise_file = open(output_dir + "/promise_confs.txt", "w+") - confidence_file = open(output_dir + "/confidence_summary.txt", "w+") - - max_configs = 50 - it_count = 0 - for x in sorted_list: - if x[1] > accuracy and x[0] > confidence: - config_str = getLayerConfigStr(x[3], layer_sizes, num_flags) - promise_file.write(config_str + "\n") - it_count += 1 - if it_count > max_configs: - break - - confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n") - - promise_file.close() - confidence_file.close() - - print "Dumped Confidence Summary" - - - - -def dump_promise_confidence_files3(binary, input_dir, output_dir, layer_file_path, - num_flags, accuracy, layer_costs, confidence): - - - #result_dir = args.result_dir - #output_dir = result_dir + "/high_confidence" - #input_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - layer_sizes = processLayerFile(layer_file_path); - print layer_sizes - sleep(2) - - confidence_list = compute_promise_confidence2(binary, accuracy, confidence, layer_costs, input_dir, output_dir) - print confidence_list - - # Ascending sort on accuracy - sorted_list = sorted(confidence_list, key = lambda tup: tup[1]) - - promise_file = open(output_dir + "/promise_confs.txt", "w+") - confidence_file = open(output_dir + "/confidence_summary.txt", "w+") - - max_configs = 50 - it_count = 0 - for x in sorted_list: - if x[1] > accuracy and x[0] > confidence: - config_str = getLayerConfigStr(x[3], layer_sizes, num_flags) - promise_file.write(config_str + "\n") - it_count += 1 - if it_count > max_configs: - break - - confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n") - - promise_file.close() - confidence_file.close() - - print "Dumped Confidence Summary" - - - - - -def dump_promise_confidence_files4(binary, input_dir, output_dir, layer_file_path, - num_flags, accuracy, layer_costs, confidence, knobs_speedup): - - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - layer_sizes = processLayerFile(layer_file_path); - print layer_sizes - sleep(2) - - confidence_list = compute_promise_confidence3(binary, accuracy, confidence, layer_costs, input_dir, output_dir, knobs_speedup) - print confidence_list - - # Ascending sort on accuracy - sorted_list = sorted(confidence_list, key = lambda tup: tup[1]) - - promise_file = open(output_dir + "/promise_confs.txt", "w+") - confidence_file = open(output_dir + "/confidence_summary.txt", "w+") - - max_configs = 50 - it_count = 0 - for x in sorted_list: - if x[1] > accuracy and x[0] > confidence: - config_str = getLayerConfigStr(x[3], layer_sizes, num_flags) - promise_file.write(config_str + "\n") - it_count += 1 - if it_count > max_configs: - break - - confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n") - - promise_file.close() - confidence_file.close() - - print "Dumped Confidence Summary" - - - - - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - argparser.add_argument('--output-dir', help='Directory for storing output directory') - argparser.add_argument('--binary', help='Binary name to run') - argparser.add_argument('--accuracy', type=float, help='Accuracy constraint') - argparser.add_argument('--confidence', type=float, help='Confidence threshold') - - - args = argparser.parse_args() - result_dir = args.result_dir - output_dir = args.output_dir - binary = args.binary - accuracy = args.accuracy - confidence = args.confidence - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - #print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/pareto_curve.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/pareto_curve.py deleted file mode 100644 index db8233994b855317095c94331fba869d9ad79d16..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/pareto_curve.py +++ /dev/null @@ -1,264 +0,0 @@ - - -import os -import shutil -from measure_confidence2 import getConfigCost - - -AL_THRESHOLD = 0.1 - - -class Config: - def __init__(self): - self.avg_accuracy = 0 - self.avg_loss = 0 - self.speedup = 1 - self.fname = "" - self.flags = [] - - - - -def skipFile(fname): - - skip_files = {} - skip_files["confidence_summary.txt"] = 1 - skip_files["promise_confs.txt"] = 1 - - if "accuracy" in fname: - return True - - if fname in skip_files: - return True - else: - return False - - - - -def loadConfigData(result_dir, layer_costs, baseline_accuracy): - - config_arr = [] - - #result_dir += "/promise_tuner/high_confidence/" - file_names = os.listdir(result_dir) - - - for fname in file_names: - if not skipFile(fname): - - fpath = result_dir + fname - config = Config() - f = open(fpath, "r") - - config_str = f.read() - cost, speedup = getConfigCost(layer_costs, config_str) - - config.speedup = speedup - config.fname = fname - - fpath2 = fpath + "_accuracy" - f2 = open(fpath2, "r") - acc_str = f2.read().strip() - accuracy = float(acc_str) - - config.avg_accuracy = accuracy - config.avg_loss = baseline_accuracy - accuracy - - config_arr.append(config) - - - return config_arr - - - - -class Configuration: - def __init__(self, name, speedup, energy, accuracy, accuracy_loss): - self.name = name - self.speedup = speedup - self.energy = energy - self.accuracy = accuracy - self.accuracy_loss = accuracy_loss - def __repr__(self): - return repr((self.name, self.speedup, self.energy, self.accuracy, self.accuracy_loss)) - -configuration_objects = [ - Configuration('conf1', 1.05, 15, 85, 1.2), - Configuration('conf2', 2.51, 12, 83, 1.4), - Configuration('conf3', 2.05, 10, 84, 0.8), -] - -def compute_pareto_points(configurations): - speedupconfigurations = [] - energyconfigurations = [] - #sort configurations based on speedup - sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - # find best speedup end energy in this accuracy loss level - sp = -1.0 - sp_idx = 0 - en = -1.0 - en_idx = 0 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup > sp: - sp = sorted_configurations[i].speedup - sp_idx = i - if sorted_configurations[i].energy > en: - en = sorted_configurations[i].energy - en_idx = i - sp_not_dominated = True - # if not empty list of configurations - if speedupconfigurations: - if speedupconfigurations[-1].speedup >= sp: - sp_not_dominated = False - en_not_dominated = True - # if not empty list of configurations - if energyconfigurations: - if energyconfigurations[-1].energy >= en: - en_not_dominated = False - if sp_not_dominated: - speedupconfigurations.append(sorted_configurations[sp_idx]) - if en_not_dominated: - energyconfigurations.append(sorted_configurations[en_idx]) - # outer while loop variable increment - start_idx = end_idx - return [speedupconfigurations, energyconfigurations] - - -def compute_pareto_points_with_margin(configurations, speedup_band_width, energy_band_width): - speedupconfigurations = [] - energyconfigurations = [] - #sort configurations based on speedup - sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) - - idx_to_sp_conf_dict = {} - idx_to_en_conf_dict = {} - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - # find best speedup end energy in this accuracy loss level - sp = -1.0 - sp_idx = 0 - en = -1.0 - en_idx = 0 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup > sp: - sp = sorted_configurations[i].speedup - sp_idx = i - if sorted_configurations[i].energy < en: - en = sorted_configurations[i].energy - en_idx = i - sp_not_dominated = True - # if not empty list of configurations - if speedupconfigurations: - if speedupconfigurations[-1].speedup >= sp: - sp_not_dominated = False - en_not_dominated = True - # if not empty list of configurations - if energyconfigurations: - if energyconfigurations[-1].energy >= en: - en_not_dominated = False - if sp_not_dominated: - speedupconfigurations.append(sorted_configurations[sp_idx]) - idx_to_sp_conf_dict[start_idx] = len(speedupconfigurations)-1 - if en_not_dominated: - energyconfigurations.append(sorted_configurations[en_idx]) - idx_to_en_conf_dict[start_idx] = len(energyconfigurations)-1 - # outer while loop variable increment - start_idx = end_idx - - # We want to add configurations in a band of a certain width around the curves - # not possible to do during contruction, because the quality of the curve would - # deteriorate quickly - - AdjustedSpeedupCurve = [] - AdjustedEnergyCurve = [] - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup + speedup_band_width >= speedupconfigurations[idx_to_sp_conf_dict[start_idx]].speedup: - AdjustedSpeedupCurve.append(sorted_configurations[i]) - if sorted_configurations[i].energy + energy_band_width >= energyconfigurations[idx_to_en_conf_dict[start_idx]].energy: - AdjustedEnergyCurve.append(sorted_configurations[i]) - # outer while loop variable increment - start_idx = end_idx - - return [AdjustedSpeedupCurve, AdjustedEnergyCurve] - - - -def findParetoConfigs(base_dir, layer_costs, accuracy): - - result_dir = base_dir + "/pareto/" - try: - os.mkdir(result_dir) - except: - print "could not create dir" - - input_dir = base_dir + "/full_results/" - #result_dir = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch15" - config_arr = loadConfigData(input_dir, layer_costs, accuracy) - - config_list = [] - - it = 0 - for config in config_arr: - config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss) - config_list.append(config) - - - SPEEDUP_BAND_SIZE = 1.0 - ENERGY_BAND_SIZE = 10 - - # No Pareto Selection if list is < 50 configurations - if len(config_list) < 50: - SPEEDUP_BAND_SIZE = 100 # Include all in Pareto Frontier - - - print ("*SPEEDUP_BAND_SIZE = ", SPEEDUP_BAND_SIZE) - - ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) - - - print ("len(config_list) = ", len(config_list)) - print ("len(ASC) = ", len(ASC)) - - #print (ASC) - #print (config_list) - - for conf in ASC: - #dst_path = conf.name.replace("full_results", "pareto") - src_path = base_dir + "/full_results/" + conf.name - dst_path = base_dir + "/pareto/" + conf.name - shutil.copy(src_path, dst_path) - - - -if __name__ == "__main__": - - get_pareto_configs("") - - #SC, EC = compute_pareto_points(configuration_objects) - #ASC, AEC = compute_pareto_points_with_margin(configuration_objects, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) - - #print(SC) - #print(EC) - - #print(ASC) - #print(AEC) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner2.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner2.py deleted file mode 100644 index ca96ff16c2d176b3bb91e213005202634916fc41..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner2.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence2 import dump_promise_confidence_files -from select_top_results import select_top_results -from time import sleep - - -layer_file = "" -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -evaluated_configs = {} -orig_result_dir = "" -gpu_layers = 0 - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - - # NOTE: Skipping first 'gpu_layers' to run on GPU - for flag in tuning_flags[:gpu_layers]: - manipulator.add_parameter( - EnumParameter(flag, [8, 9])) - - for flag in tuning_flags[gpu_layers:]: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("promise_flags", cfg) - - run_cmd = binary_name - print "binary_name = ", run_cmd - #run_result_call_program = self.call_program(run_cmd) - #print "returned \n\n" - - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen(run_cmd, stdout = FNULL) - p.wait() - - - accuracy = getAccuracy("final_accuracy") - total_comps = abs(accuracy_threshold - accuracy) - - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - #if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('promise_flags', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - print "done with one run" - - return Result - - - def save_final_config(self, configuration): - - print "Dumping High Confidence results \n" - sleep(20) - - # Only dumping files with 95% confidence - dump_promise_confidence_files(binary_name, orig_result_dir, layer_file, num_flags, accuracy_threshold, 95) - #select_top_results(orig_result_dir + "/high_confidence") - - - - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - return - - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune') - argparser.add_argument('--start-range', type=int, help='start range in tuning') - argparser.add_argument('--error-range', type=int, help='range of error values used in tuning') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - argparser.add_argument('--layer-file', help='layer description') - argparser.add_argument('--gpu-layers', type=int, help='first N layers to run on GPU') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - start_range = int(args.start_range) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - orig_result_dir = result_dir - if result_dir == "": - print("Provide --result-dir ") - - gpu_layers = args.gpu_layers - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - #for j in range(error_range): - # flag_ranges.append(j) - - for j in range(start_range, error_range): - flag_ranges.append(j) - - - print("flag_ranges = ", flag_ranges) - - # File with layer description - layer_file = args.layer_file - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner3.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner3.py deleted file mode 100644 index 04ce0d6158819d5cb014411456e1a985fb17b354..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner3.py +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence2 import dump_promise_confidence_files3 -from measure_confidence2 import getConfidence, getMinAccuracy -from select_top_results import select_top_results -from time import sleep -from pareto_curve import findParetoConfigs - - -layer_file = "" -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -evaluated_configs = {} -orig_result_dir = "" -gpu_layers = 0 - -test_id = 0 - -layer_costs = [] - - -def readCostFile(file_path): - - f = open(file_path) - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - print ("len(layer_costs) = ", layer_costs) - f.close() - - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - - -def getConfigCost(cfg): - - it = 0 - total_cost = 0.0 - for flag in tuning_flags: - flag_value = cfg[flag] - if flag_value == 11: - total_cost += layer_costs[it] - elif flag_value == 10: - total_cost += (layer_costs[it] / 1.3) - elif flag_value == 8 or flag_value == 9: - total_cost += (layer_costs[it] / 1.6) - elif flag_value < 8: - divisor = 5 + (7 - flag_value) - total_cost += (layer_costs[it] / divisor) - - it += 1 - - return total_cost - - - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - - flags_arr = [] - for i in range (8, error_range): - flags_arr.append(i) - - # NOTE: Skipping first 'gpu_layers' to run on GPU - for flag in tuning_flags[:gpu_layers]: - manipulator.add_parameter( - EnumParameter(flag, flags_arr)) - - ind = gpu_layers - for flag in tuning_flags[gpu_layers:]: - if ind in skip_layers: - manipulator.add_parameter( - EnumParameter(flag, flags_arr)) - print ("8 ..... 11") - else: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - )) #default is needed, optimizations don't work without it(tried and tested) - print ("1 .... 11") - ind += 1 - - - return manipulator - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - global test_id - - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("promise_flags", cfg) - - run_cmd = binary_name - print "\nbinary_name = ", run_cmd - #run_result_call_program = self.call_program(run_cmd) - - - total_runs = 2 - FNULL = open(os.devnull, 'wb') - #p = subprocess.Popen(run_cmd, stdout = FNULL) - p = subprocess.Popen([run_cmd, str(total_runs)], stdout = FNULL) - p.wait() - - - accuracy = getAccuracy("final_accuracy") - - # Get Confidence for multiple runs - conf, avg_acc = getConfidence("run_accuracies.txt", accuracy_threshold) - - # getConfigCost returns the cost associated with the selected configuration - total_comps = getConfigCost(cfg) - - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - #Result.accuracy = accuracy - min_accuracy = getMinAccuracy("run_accuracies.txt") - print ("min_accuracy = ", min_accuracy) - Result.accuracy = min_accuracy - - # Only pass conf if conf == 100 - if min_accuracy > accuracy_threshold and conf == 100: - print ("conf = ", conf, " avg_acc = ", avg_acc) - #if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('promise_flags', output_dir + '/' + binary_name + '_' + str(test_id)) - - f_acc = open(output_dir + '/' + binary_name + '_' + str(test_id) + "_accuracy", "w") - f_acc.write(str(accuracy)) - f_acc.close() - - - test_id += 1 - - return Result - - - def save_final_config(self, configuration): - - print "Dumping High Confidence results \n" - sleep(2) - - - findParetoConfigs(orig_result_dir, layer_costs, accuracy_threshold) - - input_dir = orig_result_dir + "/pareto/" - output_dir = orig_result_dir + "/high_confidence/" - - # Only dumping files with 95% confidence - dump_promise_confidence_files3(binary_name, input_dir, output_dir, layer_file, num_flags, accuracy_threshold, layer_costs, 95) - #select_top_results(orig_result_dir + "/high_confidence") - - - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - return - - - -error_range = 11 - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune') - argparser.add_argument('--start-range', type=int, help='start range in tuning') - argparser.add_argument('--error-range', type=int, help='range of error values used in tuning') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - argparser.add_argument('--layer-file', help='layer description') - argparser.add_argument('--cost-file', help='layer description') - argparser.add_argument('--gpu-layers', type=int, help='first N layers to run on GPU') - argparser.add_argument('--skip-layers', help='layer IDs to run on GPU') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - start_range = int(args.start_range) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - orig_result_dir = result_dir - if result_dir == "": - print("Provide --result-dir ") - - gpu_layers = args.gpu_layers - skip_layers_str = args.skip_layers - - skip_layers = [] - layer_ids = skip_layers_str.split("_") - for layer_id in layer_ids: - skip_layers.append(int(layer_id)) - - print ("skip_layers = ", skip_layers) - - # NOTE: Reading the cost file (with No of ops) to better guide the Autotuner - cost_file_path = args.cost_file - readCostFile(cost_file_path) - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - #for j in range(error_range): - # flag_ranges.append(j) - - for j in range(start_range, error_range): - flag_ranges.append(j) - - - print("flag_ranges = ", flag_ranges) - - # File with layer description - layer_file = args.layer_file - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner_piped.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner_piped.py deleted file mode 100644 index cf84c503b09b6b74474cd4730d93aabd34b5ee2a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/promise_tuner_piped.py +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence import dump_promise_confidence_files -from select_top_results import select_top_results -from time import sleep - - -layer_file = "" -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -evaluated_configs = {} -orig_result_dir = "" - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - -def kill(proc_pid): - process = psutil.Process(proc_pid) - for proc in process.children(recursive=True): - proc.kill() - process.kill() - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - FNULL = open(os.devnull, 'wb') - #run_result_call_program = self.call_program(run_cmd) - self.start_process = subprocess.Popen([binary_name, "opentuner_run"] , stdout=FNULL); - - try: - os.mkfifo("/tmp/myfifo") - except OSError, e: - print("FIFO exists") - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print run_cmd - #run_result_call_program = self.call_program(run_cmd) - - # Using Named Pipes to signal execution to the DNN outer thread - fifo = open("/tmp/myfifo", "w") - fifo.write("start_run") - fifo.close() - - print "Waiting for process to signal back - when done processing one run" - - fifo2 = open("/tmp/myfifo", "r") - fifo2.read() - fifo2.close() - - print "Process Signalled back" - - accuracy = getAccuracy("final_accuracy") - total_comps = abs(accuracy_threshold - accuracy) - - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - #if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('opentuner_flags', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - print "done with one run" - - return Result - - - def save_final_config(self, configuration): - - print "Dumping High Confidence results \n" - sleep(20) - - # Only dumping files with 95% confidence - dump_promise_confidence_files(binary_name, orig_result_dir, layer_file, num_flags, accuracy_threshold, 95) - #select_top_results(orig_result_dir + "/high_confidence") - - - #self.start_process.kill() - kill(self.start_process.pid) - - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - return - - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune') - argparser.add_argument('--error-range', type=int, help='range of error values used in tuning') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - argparser.add_argument('--layer-file', help='layer description') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - orig_result_dir = result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - # File with layer description - layer_file = args.layer_file - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/psnr_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/psnr_tuner.py deleted file mode 100644 index eb126de3aaf15ed3dfb1c30cdf02e28d62d1d939..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/psnr_tuner.py +++ /dev/null @@ -1,318 +0,0 @@ -#!/usr/bin/env python -# -# Algorithmic Approximation Tuning -# Purpose: Tunes for Perforation, Sampling, Numerical Precision (FP16) - - -import adddeps - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence2 import dump_promise_confidence_files3 -from measure_confidence2 import getConfidence, getMinAccuracy -from select_top_results import select_top_results -from time import sleep -from pareto_curve import findParetoConfigs - - - - -class TunerData: - def __init__(self): - self.binary_path = "" - self.output_dir = "" - self.num_layers = 0 - self.knobs_list = [] - self.knobs_speedup = {} - self.accuracy_threshold = 0 - self.test_id = 0 - self.layer_costs = [] - self.tuning_flags = [] - self.autotuner_runs = 0 - - - - -tunerData = TunerData() - - - - -def readCostFile(file_path): - - layer_costs = [] - f = open(file_path) - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - print ("len(layer_costs) = ", layer_costs) - f.close() - - return layer_costs - - - -def getPSNR(file_name): - with open(file_name) as f: - try: - raw_str = f.read() - violation, avg_psnr = [float(s) for s in raw_str.split(",")] - except: - return None, None - print (100 - violation, avg_psnr) - return 100 - violation, avg_psnr - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for i in range(tunerData.num_layers): # flag in tunerData.tuning_flags: - flag = tunerData.tuning_flags[i] - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - - -def readLayerKnobs(file_path): - - f = open(file_path, "r") - knobs_list = [] - for x in f: - knobs = [] - vals = x.split(",") - for val in vals: - knobs.append(int(val)) - - knobs_list.append(knobs) - - print ("knobs_list = ", knobs_list) - - return knobs_list - - - -def readKnobConfig(file_path): - - knobs_speedup = {} - f = open(file_path, "r") - for x in f: - toks = x.split("\t") - ID = int(toks[0].split(",")[1]) - - speedup = float(toks[2]) - knobs_speedup[ID] = speedup - - print ("knobs_speedup = ", knobs_speedup) - - return knobs_speedup - - - - -def getConfigCost(cfg): - - orig_cost = 0.0 - total_cost = 0.0 - for it in range(tunerData.num_layers): - flag = tunerData.tuning_flags[it] - flag_value = cfg[flag] - op_cost = tunerData.layer_costs[it] - speedup = tunerData.knobs_speedup[flag_value] - - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - - it += 1 - - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - - return total_cost, speedup - - - -def appendTopLine(f_path, accuracy, total_runs, total_comps, speedup): - - f_str = open(f_path, "r").read() - - f_out = open(f_path, "w+") - f_out.write("avg_accuracy=" + str(accuracy) + "\tconfig_cost=" + str(total_comps) + "\tspeedup=" + str(speedup) + "\n" ) - f_out.write(f_str) - - f_out.close() - - - -def dumpAccuracyFile(accuracy): - - f_acc = open(tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) + "_accuracy", "w") - f_acc.write(str(accuracy)) - f_acc.close() - - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(tunerData.accuracy_threshold) - input_manager = FixedInputManager(size=tunerData.num_layers) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - - for i in range(tunerData.num_layers): - tunerData.tuning_flags.append("flag" + str(i)) - - - #for flag in tunerData.tuning_flags: - for ind in range(tunerData.num_layers): - flag = tunerData.tuning_flags[ind] - - manipulator.add_parameter(EnumParameter(flag, tunerData.knobs_list[ind])) - print ("ind = ", ind, " len = ", len(tunerData.knobs_list)) - print (tunerData.knobs_list[ind]) - - ind += 1 - - return manipulator - - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - global test_id - - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("promise_flags", cfg) - - run_cmd = tunerData.binary_path - print "\nbinary_path = ", run_cmd - - input_size = 5000 - offset = 5000 - - total_runs = 1 # NOTE: Single run sufficient in Algorithmic Approx Tuner - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([run_cmd], stdout = FNULL) - p.wait() - if p.returncode != 0: - # Something went wrong - sys.stderr.write("Child program returned non-zero; you may want to stop and check.") - - success_rate, avg_psnr = getPSNR("final_accuracy") - - # getConfigCost returns the cost associated with the selected configuration - total_comps, speedup = getConfigCost(cfg) - - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = success_rate - - if success_rate > tunerData.accuracy_threshold: - config_tuple = (total_comps, success_rate, cfg) - self.configs_list.append(config_tuple) - f_path = tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) - shutil.copy('promise_flags', f_path) - - appendTopLine(f_path, avg_psnr, total_runs, total_comps, speedup) - - # dumpAccuracyFile(accuracy) - - - tunerData.test_id += 1 - - return Result - - - def save_final_config(self, configuration): - - print "Done with Autotuning Run \n" - sleep(2) - - print "Final configuration", configuration.data - - return - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='path to target binary') - argparser.add_argument('--num-layers', type=int, help='num of flags to tune') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='result directory') - argparser.add_argument('--cost-file', help='layer description') - argparser.add_argument('--knobs-config', help='knob settings and ID mapping') - argparser.add_argument('--layer-knobs', help='per-layer Knobs') - - - args = argparser.parse_args() - - tunerData.binary_path = str(args.binary) - tunerData.num_layers = int(args.num_layers) - tunerData.accuracy_threshold = float(args.accuracy) - - # NOTE: Reading the cost file (with No of ops) to better guide the Autotuner - cost_file_path = args.cost_file - tunerData.layer_costs = readCostFile(cost_file_path) - - tunerData.knobs_list = readLayerKnobs(args.layer_knobs) - tunerData.knobs_speedup = readKnobConfig(args.knobs_config) - - - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - tunerData.output_dir = result_dir + "/high_confidence/" - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(tunerData.output_dir): - print("Creating output directory = ", tunerData.output_dir) - os.mkdir(tunerData.output_dir) - - - - ClangFlagsTuner.main(argparser.parse_args()) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/select_top_results.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/select_top_results.py deleted file mode 100644 index 7ee878e5f8f84f3f56ea982c1f933b2c1a5b914b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/select_top_results.py +++ /dev/null @@ -1,101 +0,0 @@ - - -import argparse -import sys -import os - - -log_index = 9 -linear_index = 10 -quad_index = 11 - -top_k = 10 -skip_lines = 1 - - -def dump_results(sorted_list, k, result_dir, sub_dir): - - ref_dir = result_dir + "/" + sub_dir - if not os.path.exists(ref_dir): - os.mkdir(ref_dir) - - for i in range(min(k, len(sorted_list)) ): - file_name = sorted_list[i][1] - file_name = ref_dir + "/" + file_name + "_rank_" + str(i) - f = open(file_name, "w+") - f.write(str(sorted_list[i][2]) + "\t") - f.write(str(sorted_list[i][3]) + "\t") - f.write(str(sorted_list[i][4]) + "\n") - f.write(sorted_list[i][0]) - f.close() - - - - -def select_top_results(result_dir): - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - results_arr = [] - - for file_name in file_names: - - if file_name == "confidence_summary.txt": - continue - - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - log_result = 0.0 - linear_result = 0.0 - quad_result = 0.0 - file_str = "" - - index = 0 - f = open(result_dir + "/" + file_name) - for x in f: - if index >= skip_lines: - words = x.split() - log_result += float(words[log_index]) - linear_result += float(words[linear_index]) - quad_result += float(words[quad_index]) - file_str += x - - index += 1 - - - file_result = (file_str, file_name, log_result, linear_result, quad_result) - results_arr.append(file_result) - - - sorted_list = sorted(results_arr, key = lambda tup: tup[2]) - dump_results(sorted_list, top_k, result_dir, "log") - - sorted_list = sorted(results_arr, key = lambda tup: tup[3]) - dump_results(sorted_list, top_k, result_dir, "linear") - - sorted_list = sorted(results_arr, key = lambda tup: tup[4]) - dump_results(sorted_list, top_k, result_dir, "quad") - - -#def select_top_configuration(result_dir): - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - - args = argparser.parse_args() - result_dir = args.result_dir - - select_top_results(result_dir) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/utils.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/utils.py deleted file mode 100644 index 47429d95991c77c799b809e569a08f8e184da79f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/utils.py +++ /dev/null @@ -1,157 +0,0 @@ - - -import psutil -from time import sleep -import os - - - -def readCostFile(cost_file_path): - - layer_costs = [] - f = open(cost_file_path) - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - print ("-Layer count = ", layer_costs) - f.close() - - return layer_costs - - - -def readAccuracy(accuray_res_file): - - file = open(accuray_res_file, "r") - accuracy_str = file.read() - file.close() - accuracy = 0 - - try: - accuracy = float(accuracy_str) - except: - accuracy = 0 - - print ("*Configuration Accuracy = ", accuracy, "\n\n") - return accuracy - - -def genLayerFlagsFile(flags_file_path, cfg, tunerData): - - f = open(flags_file_path, "w+") - cmd_config = "" - for i in range(tunerData.num_layers): # flag in tunerData.tuning_flags: - flag = tunerData.tuning_flags[i] - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - - -def readLayerKnobs(layer_knobs_path): - - f = open(layer_knobs_path, "r") - knobs_list = [] - for x in f: - knobs = [] - vals = x.split(",") - for val in vals: - knobs.append(int(val)) - - knobs_list.append(knobs) - - print ("\n **** Global Approximation Knobs List = \n", knobs_list) - - return knobs_list - - - -def readGlobalKnobConfig(global_knobs_path): - - knobs_speedup = {} - f = open(global_knobs_path, "r") - for x in f: - toks = x.split("\t") - ID = int(toks[0].split(",")[1]) - - speedup = float(toks[2]) - knobs_speedup[ID] = speedup - - print ("knobs_speedup = ", knobs_speedup) - - return knobs_speedup - - - - -def computeConfigCost(cfg, tunerData): - - orig_cost = 0.0 - total_cost = 0.0 - for it in range(tunerData.num_layers): - flag = tunerData.tuning_flags[it] - flag_value = cfg[flag] - op_cost = tunerData.layer_costs[it] - speedup = tunerData.knobs_speedup[flag_value] - - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - - - - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - - return total_cost, speedup - - - -def addInfoToOutFile(f_path, accuracy, total_runs, total_comps, speedup): - - f_str = open(f_path, "r").read() - - f_out = open(f_path, "w+") - - f_out.write("total_runs=" + str(total_runs) + "\tconfidence=100.0" + "\tavg_accuracy=" + \ - str(accuracy) + "\tconfig_cost=" + str(total_comps) + "\tspeedup=" + str(speedup) + "\n" ) - f_out.write(f_str) - - f_out.close() - - - -def check_pid(pid): - - """ Check For the existence of a unix pid. """ - try: - os.kill(pid, 0) - except OSError: - return False - else: - return True - - - -def process_kill(proc_pid): - - if not check_pid(proc_pid): - return # Return if process does not exist - - process = psutil.Process(proc_pid) - - try: - for proc in process.children(recursive=True): - proc.kill() - process.kill() - - print ("\n\n\n\n\n\ %%%%% Killed Process \n\n\n\n") - - except: - print ("\n\n\n\n PROCESS NOT KILLED ------- \n\n\n\n\n\n\n") - - #sleep(20) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/debian-packages-deps b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/debian-packages-deps deleted file mode 100644 index ea49289a875cfe80df1de02307e03f7791c00adf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/debian-packages-deps +++ /dev/null @@ -1,9 +0,0 @@ -build-essential -git -gnuplot -libfreetype6-dev -libpng-dev -libsqlite3-dev -python-dev -python-pip -sqlite3 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/Makefile b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/Makefile deleted file mode 100644 index 1c028b3a91e5750dc927f6a865923ca2e9ac141a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext - -help: - @echo "Please use \`make <target>' where <target> is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenTuner.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenTuner.qhc" - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/OpenTuner" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/OpenTuner" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/rtd-requirements.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/rtd-requirements.txt deleted file mode 100644 index e30d149ed5e3356ff54d915b556dfaed0dfb6148..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/rtd-requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -argparse>=1.2.1 -django==1.6.1 -fn>=0.2.12 -SQLAlchemy>=0.8.2 -virtualenv==1.9.1 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/conf.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/conf.py deleted file mode 100644 index a27fabf403e0e7f6081d906819167e94eb236b61..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/conf.py +++ /dev/null @@ -1,261 +0,0 @@ -# -*- coding: utf-8 -*- -# -# OpenTuner documentation build configuration file, created by -# sphinx-quickstart on Sat Jan 3 04:13:12 2015. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('../..')) - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.pngmath', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'OpenTuner' -copyright = u'2015, Jason Ansel' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '0.0' -# The full version, including alpha/beta/rc tags. -release = '0.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'default' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# "<project> v<release> documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -#html_extra_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a <link> tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'OpenTunerdoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - ('index', 'OpenTuner.tex', u'OpenTuner Documentation', - u'Jason Ansel', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'opentuner', u'OpenTuner Documentation', - [u'Jason Ansel'], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ('index', 'OpenTuner', u'OpenTuner Documentation', - u'Jason Ansel', 'OpenTuner', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/index.rst b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/index.rst deleted file mode 100644 index 48f7468982f559d60ef98736539567fa0a320ec3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/index.rst +++ /dev/null @@ -1,27 +0,0 @@ -.. OpenTuner documentation master file, created by - sphinx-quickstart on Sat Jan 3 04:13:12 2015. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to OpenTuner's documentation! -===================================== -This is still under construction - - -Contents: - -.. toctree:: - :maxdepth: 2 - - params - techniques - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/params.rst b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/params.rst deleted file mode 100644 index b8d08cd300d466c5be43dabfa0e4db1abd12182b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/params.rst +++ /dev/null @@ -1,339 +0,0 @@ -.. currentmodule:: opentuner.search.manipulator - -**************** -Parameters -**************** - -This will be an overview of parameters in OpenTuner. - -Each Parameter instance is created with a name. Most methods in parameters operate on configurations, dict-like objects spawned by the ConfigurationManipulator. Configurations contain values corresponding to a collection of instances of named parameters. - -A Parameter’s methods may mutate the value in a configuration corresponding to the name of the particular parameter instance. These methods are called operators. - -============================== -Parameter Types and Operators -============================== - -Each parameter has a set of operators. These operators take in a set of parent configurations and mutate the corresponding parameter value in the first configuration according to the parent values. Operators form the set of available transformations for search techniques to generate new configurations to test. - -Operator methods can be identified by the prefix 'op#_', where # is the number of required input configurations. The prefix 'opn\_' specifies an arbitrary number of input configurations, as a list. The first argument into an operator is always the configuration that will be mutated. This is followed by the required parent configurations, then any required arguments, and finally optional arguments. - -Any operators defined for a Parameter are inherited by its subclasses. - ------------------ -Parameter ------------------ -This is an abstract base interface for parameters. - -.. autoclass:: Parameter - - .. automethod:: op1_randomize - - .. automethod:: op3_swarm - - .. automethod:: op4_set_linear - - .. automethod:: opn_stochastic_mix - - -------------------------- -Primitive Parameter -------------------------- -.. autoclass:: PrimitiveParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`Parameter.op1_randomize`, - :meth:`Parameter.op3_swarm`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_normal_mutation - - **This paragraph can have examples for the above operator** - - .. automethod:: op4_set_linear - - ------------------------- -Numeric Parameter ------------------------- -.. autoclass:: NumericParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`Parameter.op3_swarm`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - - .. automethod:: op1_scale - - .. automethod:: op3_difference - - .. automethod:: opn_sum - - ------------------------- -Integer Parameter ------------------------- -.. autoclass:: IntegerParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`NumericParameter.op1_randomize`, - :meth:`NumericParameter.op1_scale`, - :meth:`NumericParameter.op3_difference`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix`, - :meth:`NumericParameter.opn_sum` - - .. automethod:: op3_swarm - - ------------------------- -Float Parameter ------------------------- -.. autoclass:: FloatParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`NumericParameter.op1_randomize`, - :meth:`NumericParameter.op1_scale`, - :meth:`NumericParameter.op3_difference`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix`, - :meth:`NumericParameter.opn_sum` - - .. automethod:: op3_swarm - - ------------------------- -ScaledNumericParameter ------------------------- -.. autoclass:: ScaledNumericParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`NumericParameter.op1_randomize`, - :meth:`NumericParameter.op1_scale`, - :meth:`NumericParameter.op3_difference`, - :meth:`Parameter.op3_swarm`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix`, - :meth:`NumericParameter.opn_sum` - - ------------------------- -LogIntegerParameter ------------------------- -.. autoclass:: LogIntegerParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`NumericParameter.op1_randomize`, - :meth:`NumericParameter.op1_scale`, - :meth:`NumericParameter.op3_difference`, - :meth:`FloatParameter.op3_swarm`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix`, - :meth:`NumericParameter.opn_sum` - - ------------------------- -LogFloatParameter ------------------------- -.. autoclass:: LogFloatParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`NumericParameter.op1_randomize`, - :meth:`NumericParameter.op1_scale`, - :meth:`NumericParameter.op3_difference`, - :meth:`FloatParameter.op3_swarm`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix`, - :meth:`NumericParameter.opn_sum` - - ------------------------- -PowerOfTwoParameter ------------------------- -.. autoclass:: LogFloatParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`PrimitiveParameter.op1_normal_mutation`, - :meth:`NumericParameter.op1_randomize`, - :meth:`NumericParameter.op1_scale`, - :meth:`NumericParameter.op3_difference`, - :meth:`IntegerParameter.op3_swarm`, - :meth:`PrimitiveParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix`, - :meth:`NumericParameter.opn_sum` - - ------------------------- -Complex Parameter ------------------------- -.. autoclass:: ComplexParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`Parameter.op3_swarm`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - - .. automethod:: op4_set_linear - - ------------------------- -Boolean Parameter ------------------------- -.. autoclass:: BooleanParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`Parameter.op3_swarm`, - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_flip - - .. automethod:: op1_randomize - - .. automethod:: op3_swarm - --------------------------- -Switch Parameter --------------------------- -.. autoclass:: SwitchParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`Parameter.op3_swarm`, - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - --------------------------- -Enum Parameter --------------------------- -.. autoclass:: EnumParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`Parameter.op3_swarm`, - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - - --------------------------- -Permutation Parameter --------------------------- -.. autoclass:: PermutationParameter - :show-inheritance: - - *Inherited Operators:* - - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - - .. automethod:: op1_small_random_change - - .. automethod:: op2_random_swap - - .. automethod:: op2_random_invert - - .. automethod:: op3_cross - - .. automethod:: op3_cross_PX - - .. automethod:: op3_cross_PMX - - .. automethod:: op3_cross_CX - - .. automethod:: op3_cross_OX1 - - .. automethod:: op3_cross_OX3 - - .. automethod:: op3_swarm - --------------------------- -Array --------------------------- -.. autoclass:: Array - :show-inheritance: - - *Inherited Operators:* - - :meth:`ComplexParameter.op1_randomize`, - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op3_cross - - .. automethod:: op3_swarm - - --------------------------- -BooleanArray --------------------------- -.. autoclass:: BooleanArray - :show-inheritance: - - *Inherited Operators:* - - :meth:`Array.op3_cross`, - :meth:`Array.op3_swarm`, - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - - .. automethod:: op3_swarm_parallel - - --------------------------- -FloatArray --------------------------- -.. autoclass:: FloatArray - :show-inheritance: - - *Inherited Operators:* - - :meth:`Array.op3_cross`, - :meth:`Array.op3_swarm`, - :meth:`ComplexParameter.op4_set_linear`, - :meth:`Parameter.opn_stochastic_mix` - - .. automethod:: op1_randomize - - .. automethod:: op3_swarm_parallel - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/techniques.rst b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/techniques.rst deleted file mode 100644 index 3bbebddedbd9c4b999fa8a4f58244ee482ffe673..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/docs/source/techniques.rst +++ /dev/null @@ -1,51 +0,0 @@ -.. currentmodule:: opentuner.search.composableevolutionarytechniques - -******************** -Current Techniques -******************** - -OpenTuner has a library of existing search techniques. - -================================= -Composable Search Techniques -================================= - -A ComposableEvolutionaryTechnique allows for composition between the search technique and any operators. Creating a ComposableEvolutionaryTechnique requires implementing 3 methods: - - * :meth:`minimum_number_of_parents <ComposableEvolutionaryTechnique.minimum_number_of_parents>` - * :meth:`get_parents <ComposableEvolutionaryTechnique.get_parents>` - * :meth:`update_population <ComposableEvolutionaryTechnique.update_population>` - -Additionally, the following methods may be overridden for further customization - - * :meth:`make_population_member <ComposableEvolutionaryTechnique.make_population_member>` - * :meth:`select_parameters <ComposableEvolutionaryTechnique.select_parameters>` - * :meth:`get_default_operator <ComposableEvolutionaryTechnique.get_default_operator>` - -The following methods are useful when choosing parents or updating the population: - - * :meth:`lt <ComposableEvolutionaryTechnique.lt>` - * :meth:`lte <ComposableEvolutionaryTechnique.lte>` - * :meth:`get_global_best_configuration <ComposableEvolutionaryTechnique.get_global_best_configuration>` - -A ComposableEvolutionaryTechnique will yields configurations generated by successive iterations of applying operators on the configurations returned by :meth:`get_parents <ComposableEvolutionaryTechnique.get_parents>` and updating the population with the new configuration through :meth:`update_population <ComposableEvolutionaryTechnique.update_population>` - -.. autoclass:: ComposableEvolutionaryTechnique - - .. automethod:: minimum_number_of_parents - - .. automethod:: get_parents - - .. automethod:: update_population - - .. automethod:: make_population_member - - .. automethod:: select_parameters - - .. automethod:: get_default_operator - - .. automethod:: lt - - .. automethod:: lte - - .. automethod:: get_global_best_configuration diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/.gitignore deleted file mode 100644 index f525a6259ba8a55dbb66c2eb9b3489e9784ae523..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -*-journal -stats -opentuner.log -opentuner.db diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/.gitignore deleted file mode 100644 index fab2c2b13c5afd35380ae5cf8f4317d2acc58a06..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -tmp.bin -cc_flags.json -gccflags_final_config.cmd -gccflags_final_config.json -cc_params.json diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/.gitignore deleted file mode 100644 index f06d3e01a2bedbfadb4c05ad181eb8745ac2f608..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/.gitignore +++ /dev/null @@ -1 +0,0 @@ -fft.c diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/matrixmultiply.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/matrixmultiply.cpp deleted file mode 100644 index 9989ffbf4a2ff1f6dffcfbdcab1b7e3f3116c7a1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/matrixmultiply.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// based on: http://blogs.msdn.com/b/xiangfan/archive/2009/04/28/optimize-your-code-matrix-multiplication.aspx -// by Xiang Fan - -#include <algorithm> -#include <iostream> - -#define N 512 - - -template<class T> -T** make_test_matrix() { - T** data = new T*[N]; - for (int i = 0; i < N; i++) { - data[i] = new T[N]; - } - for(int i = 0; i < N; i++) { - for(int j = 0; j < N; j++) { - data[i][j] = (int) i * j; - } - } - return data; -} - - - -template<typename T> -void Transpose(int size, T** __restrict__ m) -{ - for (int i = 0; i < size; i++) { - for (int j = i + 1; j < size; j++) { - std::swap(m[i][j], m[j][i]); - } - } -} -template<typename T> -void SeqMatrixMult3(int size, T** __restrict__ m1, T** __restrict__ m2, - T** __restrict__ result) { - Transpose(size, m2); - for (int i = 0; i < size; i++) { - for (int j = 0; j < size; j++) { - T c = 0; - for (int k = 0; k < size; k++) { - c += m1[i][k] * m2[j][k]; - } - result[i][j] = c; - } - } - Transpose(size, m2); -} - - -template<typename T> -void test() { - T** a = make_test_matrix<T>(); - T** b = make_test_matrix<T>(); - T** c = make_test_matrix<T>(); - SeqMatrixMult3(N, a, b, c); - - - T avg = 0; - for(int i = 0; i < N; i++) { - for(int j = 0; j < N; j++) { - avg += c[i][j] / (T)(N*N); - } - } - // print out average so caller can check answer - std::cout << avg << std::endl; -} - - -int main(int argc, const char** argv) { - test<float>(); - return 0; -} - - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/raytracer.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/raytracer.cpp deleted file mode 100644 index 3cb1192c6a0d9cbd3502186dc391efa71d5cde18..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/raytracer.cpp +++ /dev/null @@ -1,277 +0,0 @@ -/* - A very basic raytracer example. - Copyright (C) 2012 www.scratchapixel.com - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - - - changes 02/04/13: fixed flag in ofstream causing a bug under Windows, - added default values for M_PI and INFINITY - - changes 24/05/13: small change to way we compute the refraction direction - vector (eta=ior if we are inside and 1/ior if we are outside the sphere) - - Compile with the following command: c++ -o raytracer -O3 -Wall raytracer.cpp - -*/ - -#include <cstdlib> -#include <cstdio> -#include <cmath> -#include <fstream> -#include <vector> -#include <iostream> -#include <cassert> - -#if defined(__linux__) || defined(__APPLE__) - // "Compiled for Linux -#else - // Windows doesn't define these values by default, Linux does - #define M_PI 3.141592653589793 - #define INFINITY 1e8 -#endif - -template<typename T> -class Vec3 -{ -public: - T x, y, z; - Vec3() : x(T(0)), y(T(0)), z(T(0)) {} - Vec3(T xx) : x(xx), y(xx), z(xx) {} - Vec3(T xx, T yy, T zz) : x(xx), y(yy), z(zz) {} - Vec3& normalize() - { - T nor2 = length2(); - if (nor2 > 0) { - T invNor = 1 / sqrt(nor2); - x *= invNor, y *= invNor, z *= invNor; - } - return *this; - } - Vec3<T> operator * (const T &f) const { return Vec3<T>(x * f, y * f, z * f); } - Vec3<T> operator * (const Vec3<T> &v) const { return Vec3<T>(x * v.x, y * v.y, z * v.z); } - T dot(const Vec3<T> &v) const { return x * v.x + y * v.y + z * v.z; } - Vec3<T> operator - (const Vec3<T> &v) const { return Vec3<T>(x - v.x, y - v.y, z - v.z); } - Vec3<T> operator + (const Vec3<T> &v) const { return Vec3<T>(x + v.x, y + v.y, z + v.z); } - Vec3<T>& operator += (const Vec3<T> &v) { x += v.x, y += v.y, z += v.z; return *this; } - Vec3<T>& operator *= (const Vec3<T> &v) { x *= v.x, y *= v.y, z *= v.z; return *this; } - Vec3<T> operator - () const { return Vec3<T>(-x, -y, -z); } - T length2() const { return x * x + y * y + z * z; } - T length() const { return sqrt(length2()); } - friend std::ostream & operator << (std::ostream &os, const Vec3<T> &v) - { - os << "[" << v.x << " " << v.y << " " << v.z << "]"; - return os; - } -}; - -template<typename T> -class Sphere -{ -public: - Vec3<T> center; /// position of the sphere - T radius, radius2; /// sphere radius and radius^2 - Vec3<T> surfaceColor, emissionColor; /// surface color and emission (light) - T transparency, reflection; /// surface transparency and reflectivity - Sphere(const Vec3<T> &c, const T &r, const Vec3<T> &sc, - const T &refl = 0, const T &transp = 0, const Vec3<T> &ec = 0) : - center(c), radius(r), radius2(r * r), surfaceColor(sc), emissionColor(ec), - transparency(transp), reflection(refl) - {} - // compute a ray-sphere intersection using the geometric solution - bool intersect(const Vec3<T> &rayorig, const Vec3<T> &raydir, T *t0 = NULL, T *t1 = NULL) const - { - Vec3<T> l = center - rayorig; - T tca = l.dot(raydir); - if (tca < 0) return false; - T d2 = l.dot(l) - tca * tca; - if (d2 > radius2) return false; - T thc = sqrt(radius2 - d2); - if (t0 != NULL && t1 != NULL) { - *t0 = tca - thc; - *t1 = tca + thc; - } - - return true; - } -}; - -#define MAX_RAY_DEPTH 5 - -template<typename T> -T mix(const T &a, const T &b, const T &mix) -{ - return b * mix + a * (T(1) - mix); -} - -// This is the main trace function. It takes a ray as argument (defined by its origin -// and direction). We test if this ray intersects any of the geometry in the scene. -// If the ray intersects an object, we compute the intersection point, the normal -// at the intersection point, and shade this point using this information. -// Shading depends on the surface property (is it transparent, reflective, diffuse). -// The function returns a color for the ray. If the ray intersects an object that -// is the color of the object at the intersection point, otherwise it returns -// the background color. -template<typename T> -Vec3<T> trace(const Vec3<T> &rayorig, const Vec3<T> &raydir, - const std::vector<Sphere<T> *> &spheres, const int &depth) -{ - //if (raydir.length() != 1) std::cerr << "Error " << raydir << std::endl; - T tnear = INFINITY; - const Sphere<T> *sphere = NULL; - // find intersection of this ray with the sphere in the scene - for (unsigned i = 0; i < spheres.size(); ++i) { - T t0 = INFINITY, t1 = INFINITY; - if (spheres[i]->intersect(rayorig, raydir, &t0, &t1)) { - if (t0 < 0) t0 = t1; - if (t0 < tnear) { - tnear = t0; - sphere = spheres[i]; - } - } - } - // if there's no intersection return black or background color - if (!sphere) return Vec3<T>(2); - Vec3<T> surfaceColor = 0; // color of the ray/surfaceof the object intersected by the ray - Vec3<T> phit = rayorig + raydir * tnear; // point of intersection - Vec3<T> nhit = phit - sphere->center; // normal at the intersection point - nhit.normalize(); // normalize normal direction - // If the normal and the view direction are not opposite to each other - // reverse the normal direction. That also means we are inside the sphere so set - // the inside bool to true. Finally reverse the sign of IdotN which we want - // positive. - T bias = 1e-4; // add some bias to the point from which we will be tracing - bool inside = false; - if (raydir.dot(nhit) > 0) nhit = -nhit, inside = true; - if ((sphere->transparency > 0 || sphere->reflection > 0) && depth < MAX_RAY_DEPTH) { - T facingratio = -raydir.dot(nhit); - // change the mix value to tweak the effect - T fresneleffect = mix<T>(pow(1 - facingratio, 3), 1, 0.1); - // compute reflection direction (not need to normalize because all vectors - // are already normalized) - Vec3<T> refldir = raydir - nhit * 2 * raydir.dot(nhit); - refldir.normalize(); - Vec3<T> reflection = trace(phit + nhit * bias, refldir, spheres, depth + 1); - Vec3<T> refraction = 0; - // if the sphere is also transparent compute refraction ray (transmission) - if (sphere->transparency) { - T ior = 1.1, eta = (inside) ? ior : 1 / ior; // are we inside or outside the surface? - T cosi = -nhit.dot(raydir); - T k = 1 - eta * eta * (1 - cosi * cosi); - Vec3<T> refrdir = raydir * eta + nhit * (eta * cosi - sqrt(k)); - refrdir.normalize(); - refraction = trace(phit - nhit * bias, refrdir, spheres, depth + 1); - } - // the result is a mix of reflection and refraction (if the sphere is transparent) - surfaceColor = (reflection * fresneleffect + - refraction * (1 - fresneleffect) * sphere->transparency) * sphere->surfaceColor; - } - else { - // it's a diffuse object, no need to raytrace any further - for (unsigned i = 0; i < spheres.size(); ++i) { - if (spheres[i]->emissionColor.x > 0) { - // this is a light - Vec3<T> transmission = 1; - Vec3<T> lightDirection = spheres[i]->center - phit; - lightDirection.normalize(); - for (unsigned j = 0; j < spheres.size(); ++j) { - if (i != j) { - T t0, t1; - if (spheres[j]->intersect(phit + nhit * bias, lightDirection, &t0, &t1)) { - transmission = 0; - break; - } - } - } - surfaceColor += sphere->surfaceColor * transmission * - std::max(T(0), nhit.dot(lightDirection)) * spheres[i]->emissionColor; - } - } - } - - return surfaceColor + sphere->emissionColor; -} - -// Main rendering function. We compute a camera ray for each pixel of the image -// trace it and return a color. If the ray hits a sphere, we return the color of the -// sphere at the intersection point, else we return the background color. -template<typename T> -unsigned int render(const std::vector<Sphere<T> *> &spheres) -{ - unsigned width = 640, height = 480; - Vec3<T> *image = new Vec3<T>[width * height], *pixel = image; - T invWidth = 1 / T(width), invHeight = 1 / T(height); - T fov = 30, aspectratio = width / T(height); - T angle = tan(M_PI * 0.5 * fov / T(180)); - // Trace rays - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x, ++pixel) { - T xx = (2 * ((x + 0.5) * invWidth) - 1) * angle * aspectratio; - T yy = (1 - 2 * ((y + 0.5) * invHeight)) * angle; - Vec3<T> raydir(xx, yy, -1); - raydir.normalize(); - *pixel = trace(Vec3<T>(0), raydir, spheres, 0); - } - } -#if 0 - // Save result to a PPM image (keep these flags if you compile under Windows) - std::ofstream ofs("./untitled.ppm", std::ios::out | std::ios::binary); - ofs << "P6\n" << width << " " << height << "\n255\n"; - for (unsigned i = 0; i < width * height; ++i) { - ofs << (unsigned char)(std::min(T(1), image[i].x) * 255) << - (unsigned char)(std::min(T(1), image[i].y) * 255) << - (unsigned char)(std::min(T(1), image[i].z) * 255); - } - ofs.close(); -#endif - - unsigned int bad_hash = 0; - for (unsigned i = 0; i < width * height; ++i) { - bad_hash = bad_hash*31 + (unsigned int)(std::min(T(1), image[i].x) * 255); - bad_hash = bad_hash*31 + (unsigned int)(std::min(T(1), image[i].y) * 255); - bad_hash = bad_hash*31 + (unsigned int)(std::min(T(1), image[i].z) * 255); - } - delete [] image; - - return bad_hash; -} - -volatile unsigned int dont_optimize_me; - -int main(int argc, char **argv) { - srand48(13); - std::vector<Sphere<float> *> spheres; - // position, radius, surface color, reflectivity, transparency, emission color - spheres.push_back(new Sphere<float>(Vec3<float>(0, -10004, -20), 10000, Vec3<float>(0.2), 0, 0.0)); - spheres.push_back(new Sphere<float>(Vec3<float>(0, 0, -20), 4, Vec3<float>(1.00, 0.32, 0.36), 1, 0.5)); - spheres.push_back(new Sphere<float>(Vec3<float>(5, -1, -15), 2, Vec3<float>(0.90, 0.76, 0.46), 1, 0.0)); - spheres.push_back(new Sphere<float>(Vec3<float>(5, 0, -25), 3, Vec3<float>(0.65, 0.77, 0.97), 1, 0.0)); - spheres.push_back(new Sphere<float>(Vec3<float>(-5.5, 0, -15), 3, Vec3<float>(0.90, 0.90, 0.90), 1, 0.0)); - // light - spheres.push_back(new Sphere<float>(Vec3<float>(0, 20, -30), 3, Vec3<float>(0), 0, 0, Vec3<float>(3))); - - dont_optimize_me = render<float>(spheres); - __asm__ __volatile__ ("" ::: "memory"); // memory barrier - if(dont_optimize_me == 0x4bd7c0e0) { - //printf("CORRECT\n"); - } else { - printf("ERROR: WRONG ANSWER\n"); - } - - while (!spheres.empty()) { - Sphere<float> *sph = spheres.back(); - spheres.pop_back(); - delete sph; - } - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/tsp_ga.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/tsp_ga.cpp deleted file mode 100644 index 0e8f232cb099d37facffa0440b41dfd57efd4b2e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/apps/tsp_ga.cpp +++ /dev/null @@ -1,548 +0,0 @@ -// -// based on: https://bitbucket.org/knordkvist/tsp-ga/overview -// by Kristoffer Nordkvist -// -#include <algorithm> -#include <assert.h> -#include <iostream> -#include <limits> -#include <math.h> -#include <sstream> -#include <stdio.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdlib.h> -#include <string.h> -#include <string> -#include <time.h> - -class TSP -{ - public: - TSP(const double crossoverProbability, const double mutationProbability); - - /* The constants used in this project */ - static const unsigned int chromosones = 30, cities = 20, xMin = 0, xMax = 1000, yMin = 0, yMax = 500; - - /* Generate a random population of chromosones */ - void randomPopulation(); - - /* Create a new population using crossover and mutation */ - void nextPopulation(); - - /* Returns the fitness of the best chromosone */ - double getBestFitness() const; - - /* Returns a string representation of the best path */ - std::string getBestPathString() const; - - /* Returns the total distance of the best chromosone path */ - double getLowestTotalDistance() const; - - /* Returns the populations average length */ - double getAverageDistance() const; - private: - const double crossoverProbability, mutationProbability; - - /* Gets the total distance of the supplied path */ - double totalDistance(int const * const chromosone) const; - - /* The coordinates for each city, (x,y) for the first city is found in (citiesX[0], citiesY[0]) */ - double citiesX[cities], citiesY[cities]; - - /* The chromosone containing the shortest path */ - int *bestChromosone; - - /* Contains the current population of chromosones */ - int (* solutions)[cities], - /* The two chromosones with the best fitness functions */ - //bestChromosone1[cities], bestChromosone2[cities], - /* Used to store the new chromosones when creating a new population */ - (* newPopulation)[cities]; - - /* Returns a random double r, 0 <= r <= max */ - static double randomInclusive(const double max); - - /* Returns a random double r, 0 <= r < max */ - static double randomExclusive(const double max); - - /* True if the two chromosones represent the same path */ - static bool areChromosonesEqual(int const * const chromosoneA, int const * const chromosoneB); - - /* Evaluate the fitness the supplied chromosone */ - double evaluateFitness(const int * const chromosone) const; - - /* Selects a chromosone from the current population using Roulette Wheel Selection. - * Using the algorithm described in http://www.obitko.com/tutorials/genetic-algorithms/selection.php. - */ - int * rouletteSelection(double const * const fitness) const; - - /* Replace the element at offspringIndex with the first element found in other that does not exist in offspringToRepair */ - void repairOffspring(int * const offspringToRepair, int missingIndex, const int * const other); - - /* Might swap one gene with another, depending on the mutation probability */ - void mutate(int * const chromosone); - - /* Cross over the parents to form new offspring using Multi-Point Crossover, collisions are handled as shown in lecture 5. - * The chromosones might be a copy of their parents, depending on the crossover probability. - */ - void crossover(const int * const parentA, const int * const parentB, int * const offspringA, int * const offspringB); - - /* Checks if the supplied chromosone is in newPopulation */ - bool hasDuplicate(const int * const chromosone, size_t populationCount); - - /* Copies the supplied chromosone to the new population */ - void copyToNewPopulation(const int * const chromosone, size_t index); - - /* Make the chromosone represent a path, which is chosen by random */ - static void setRandomPath(int * const chromosone); -}; - -using namespace std; - -TSP::TSP(double crossoverProbability, double mutationProbability) : crossoverProbability(crossoverProbability), - mutationProbability(mutationProbability), solutions(new int[chromosones][cities]), newPopulation(new int[chromosones][cities]) -{ - /* Seed the random number generator */ - //srand((unsigned int)time(NULL)); - srand(17); - /* Use the same number to generate a specific sequence */ - //srand(0); - /* Set random coordinates */ - for(size_t coordinateIndex = 0; coordinateIndex < cities; ++coordinateIndex) - { - /* 0 <= x <= xMax */ - citiesX[coordinateIndex] = randomInclusive(xMax); - /* 0 <= y <= yMax */ - citiesY[coordinateIndex] = randomInclusive(yMax); - } - - /* Generate random population */ - randomPopulation(); -} - -void TSP::randomPopulation() -{ - /* Iterate throught each chromosone... */ - for(size_t chromosoneIndex = 0; chromosoneIndex < chromosones; ++chromosoneIndex) - { - /* ... and give it a random path */ - setRandomPath(solutions[chromosoneIndex]); - } -} - -double TSP::getBestFitness() const -{ - return evaluateFitness(bestChromosone); -} - -double TSP::getAverageDistance() const -{ - double distance = 0; - for(size_t chromosoneIndex = 0; chromosoneIndex < chromosones; ++chromosoneIndex) - { - distance += totalDistance(solutions[chromosoneIndex]); - } - return distance/chromosones; -} - -string TSP::getBestPathString() const -{ - stringstream path; - for(size_t gene = 0; gene < cities; ++gene) - { - if(gene != 0) - { - path << ","; - } - path << bestChromosone[gene]; - } - return path.str(); -} - -double TSP::getLowestTotalDistance() const -{ - return totalDistance(bestChromosone); -} - -void TSP::nextPopulation() -{ - double fitness[chromosones]; - /* Fill an array with a fitness score for each chromosone, - * the index of a score corresponds with the chromosone's index in solutions[index] - */ - for(size_t chromosoneIndex = 0; chromosoneIndex < chromosones; ++chromosoneIndex) - { - fitness[chromosoneIndex] = evaluateFitness(solutions[chromosoneIndex]); - } - - /* Use elitism, find and copy over the two best chromosones to the new population */ - int eliteIndex1 = 0, eliteIndex2 = 0; - /* find the best solution */ - eliteIndex1 = max_element(fitness, fitness + chromosones) - fitness; - this->bestChromosone = solutions[eliteIndex1]; - - double highestFitness = 0; - /* Find the second best solution */ - for(size_t chromosoneIndex = 0; chromosoneIndex < chromosones; ++chromosoneIndex) - { - if(chromosoneIndex != eliteIndex1 && fitness[chromosoneIndex] > highestFitness) - { - highestFitness = fitness[chromosoneIndex]; - eliteIndex2 = chromosoneIndex; - } - } - - /* Keep track of how many chromosones exists in the new population */ - size_t offspringCount = 0; - /* Copy over the two best solutions to the new population */ - copyToNewPopulation(solutions[eliteIndex1], offspringCount); - ++offspringCount; - copyToNewPopulation(solutions[eliteIndex2], offspringCount); - ++offspringCount; - - /* Create the rest of the new population, break this loop when the new population is complete */ - while(true) - { - int * parentA; - int * parentB; - parentA = rouletteSelection(fitness); - parentB = rouletteSelection(fitness); - while (parentB == parentA) - { - parentB = rouletteSelection(fitness); - } - int offspringA[cities]; - int offspringB[cities]; - crossover(parentA, parentB, offspringA, offspringB); - mutate(offspringA); - mutate(offspringB); - - /* Add to new population if an equal chromosone doesn't exist already */ - if(!hasDuplicate(offspringA, offspringCount)) - { - copyToNewPopulation(offspringA, offspringCount); - ++offspringCount; - } - /* We need to check if the new population is filled */ - if(offspringCount == chromosones) - { - break; - } - if(!hasDuplicate(offspringB, offspringCount)) - { - copyToNewPopulation(offspringB, offspringCount); - ++offspringCount; - } - /* Check again so that we don't accidentaly write all over the heap and have to spend an evening wondering why the heap is corrupt... :) */ - if(offspringCount == chromosones) - { - break; - } - } - - /* - * We now have a new population, - * now it needs to replace the current population - * so that we don't go through the same population every time we run this function - */ - for(size_t chromosoneIndex = 0; chromosoneIndex < chromosones; ++chromosoneIndex) - { - memcpy(solutions[chromosoneIndex], newPopulation[chromosoneIndex], sizeof(int) * cities); - } -} - -bool TSP::hasDuplicate(const int * const chromosone, size_t populationCount) -{ - /* Iterate throught each chromosone in newPopulation and compare them gene by gene */ - for(size_t chromosoneIndex = 0; chromosoneIndex < populationCount; ++chromosoneIndex) - { - int genesCompared = 0; - for(size_t gene = 0; gene < cities; ++gene) - { - if(chromosone[gene] != newPopulation[chromosoneIndex][gene]) - { - /* These chromosones are not equal! */ - break; - } - ++genesCompared; - } - - if(genesCompared == cities) - { - return true; - } - } - - return false; -} - -void TSP::mutate(int * const chromosone) -{ - /* 0.0 <= random <= 1 */ - { - double random = randomInclusive(1); - /* Nope, didn't happen */ - if(random > mutationProbability) - { - return; - } - } - - int tmp; - int random1 = (int)randomExclusive(cities); - int random2 = (int)randomExclusive(cities); - while(random1 == random2) - { - random2 = (int)randomExclusive(cities); - } - - tmp = chromosone[random1]; - chromosone[random1] = chromosone[random2]; - chromosone[random2] = tmp; - -} - -void TSP::crossover(int const * const parentA, const int * const parentB, int * offspringA, int * offspringB) -{ - { - /* There is a chance we don't perform a crossover, - * in that case the offspring is a copy of the parents - */ - /* 0.0 <= random <= 1 */ - double random = randomInclusive(1); - /* The offspring is a copy of their parents */ - if(random > crossoverProbability) - { - memcpy(offspringA, parentA, sizeof(int) * cities); - memcpy(offspringB, parentB, sizeof(int) * cities); - return; - } - } - /* Perform multi-point crossover to generate offspring */ - - /* 0 <= cuttOffIndex <= cities */ - int cuttOffIndex1 = (int)randomInclusive(cities); - int cuttOffIndex2 = (int)randomInclusive(cities); - while(cuttOffIndex2 == cuttOffIndex1) - { - cuttOffIndex2 = (int)randomExclusive(cities); - } - - unsigned int start; - unsigned int end; - if(cuttOffIndex1 < cuttOffIndex2) - { - start = cuttOffIndex1; - end = cuttOffIndex2; - } - else - { - start = cuttOffIndex2; - end = cuttOffIndex1; - } - /* Offspring A is initially copy of parent A */ - memcpy(offspringA, parentA, sizeof(int) * cities); - /* Offspring B is initially copy of parent B */ - memcpy(offspringB, parentB, sizeof(int) * cities); - - /* Put a sequence of parent B in offspring A */ - memcpy(offspringA + start, parentB + start, sizeof(int) * (end - start)); - /* Put a sequence of parent A in offspring B */ - memcpy(offspringB + start, parentA + start, sizeof(int) * (end - start)); - - /* Mark collisions in offspring with -1*/ - for(size_t cityIndex = 0; cityIndex < cities; ++cityIndex) - { - /* Index is part of the parent sequence */ - if((cityIndex >= start && cityIndex < end)) { - /* Do nothing, we want to keep this sequence intact */ - } - else - { - /* Check if the item at cityIndex also occurs somewhere in the copied substring */ - for(size_t substringIndex = start; substringIndex < end; ++substringIndex) - { - /* A duplicate, mark it */ - if(offspringA[cityIndex] == offspringA[substringIndex]) - { - offspringA[cityIndex] = -1; - } - if(offspringB[cityIndex] == offspringB[substringIndex]) - { - offspringB[cityIndex] = -1; - } - } - } - - } - - /* - * Go through the offspring, - * if an element is marked we fill the hole with an element from the other offspring - */ - for(size_t offspringIndex = 0; offspringIndex < cities; ++offspringIndex) - { - /* There is a hole here */ - if(offspringA[offspringIndex] == -1) - { - repairOffspring(offspringA, offspringIndex, offspringB); - } - if(offspringB[offspringIndex] == -1) - { - repairOffspring(offspringB, offspringIndex, offspringA); - } - } -} - -void TSP::repairOffspring(int * const offspringToRepair, int missingIndex, const int * const other) -{ - /* Iterate through the other offspring until we find an element which doesn't exist in the offspring we are repairing */ - for(size_t patchIndex = 0; patchIndex < cities; ++patchIndex) - { - /* Look for other[patchIndex] in offspringToRepair */ - int *missing = find(offspringToRepair, offspringToRepair + cities, other[patchIndex]); - - /* The element at other[patchIndex] is missing from offspringToRepair */ - if(missing == (offspringToRepair + cities)) - { - //cout << "1:" << offspringToRepair[missingIndex] << endl; - offspringToRepair[missingIndex] = other[patchIndex]; - //cout << "2:" << offspringToRepair[missingIndex] << endl; - break; - } - } -} - -void TSP::copyToNewPopulation(int const * const chromosone, size_t index) -{ - assert(index < chromosones && "Index out of bounds"); - for(size_t i = 0; i < cities; ++i) - { - newPopulation[index][i] = chromosone[i]; - } - -} - -int * TSP::rouletteSelection(double const * const fitness) const -{ - double sum = 0; - /* Calculate sum of all chromosome fitnesses in population */ - for(size_t i = 0; i < chromosones; ++i) - { - sum += fitness[i]; - } - - /* 0.0 <= random <= sum */ - double random = randomInclusive(sum); - - sum = 0; - /* Go through the population and sum fitnesses from 0 to sum s. When the sum s is greater or equal to r; stop and return the chromosome where you are */ - for(size_t i = 0; i < chromosones; ++i) - { - sum += fitness[i]; - if(sum >= random) - { - return solutions[i]; - } - } - assert(false && "A chromosone should have been picked by now"); - return(NULL); -} - -void TSP::setRandomPath(int * chromosone) -{ - for(size_t i = 0; i < cities; ++i) - { - chromosone[i] = i; - } - - /* - * Shuffle the chromosone using the Fisher–Yates shuffle. - */ - for(size_t i = cities-1; i > 0; --i) - { - /* 0 <= random <= i */ - int random = (int)randomInclusive(i); - int temp = chromosone[i]; - chromosone[i] = chromosone[random]; - chromosone[random] = temp; - } -} - -double TSP::evaluateFitness(int const * const chromosone) const -{ - return 1/totalDistance(chromosone); -} - -double TSP::totalDistance(int const * const chromosone) const -{ - double distance = 0; - /* Calculate the total distance between all cities */ - for(size_t i = 0; i < cities-1; ++i) - { - double dx = citiesX[chromosone[i]] - citiesX[chromosone[i+1]]; - double dy = citiesY[chromosone[i]] - citiesY[chromosone[i+1]]; - - /* The distance between two points is the square root of (dx^2+dy^2) */ - distance += sqrt((pow(dx, 2.0) + pow(dy, 2.0))); - } - /* We complete the tour by adding the distance between the last and the first city */ - double dx = citiesX[chromosone[cities-1]] - citiesX[chromosone[0]]; - double dy = citiesY[chromosone[cities-1]] - citiesY[chromosone[0]]; - distance += sqrt((pow(dx, 2.0) + pow(dy, 2.0))); - - return distance; -} - -double TSP::randomInclusive(double max) -{ - /* Generate random number r, 0.0 <= r <= max */ - //return ((double)rand() / (double)RAND_MAX * max); - return ((double)rand() * max) / (double)RAND_MAX; -} - -double TSP::randomExclusive(double max) -{ - /* Generate random number r, 0.0 <= r < max */ - //return ((double)rand() / ((double)RAND_MAX + 1) * max); - return ((double)rand() * max) / ((double)RAND_MAX + 1); -} - -int main(int argc, const char *argv[]) -{ - /* 90% mutation probability, 2% mutation probability */ - TSP *tsp = new TSP(0.9, 0.02); - size_t generations = 0, generationsWithoutImprovement = 0; - double bestFitness = -1; - double initialAverage = tsp->getAverageDistance(); - /* We'll stop when we've gone 10k generations without improvement */ - while(generations < 10000) - { - tsp->nextPopulation(); - ++generations; - double newFitness = tsp->getBestFitness(); - /* The new fitness is higher, the chromosone is better */ - if(newFitness > bestFitness) - { - bestFitness = newFitness; - generationsWithoutImprovement = 0; - //cout << "Best goal function: " << tsp->getBestFitness() << endl; - } - else - { - ++generationsWithoutImprovement; - } - } - //cout << "DONE!" << endl; - cout << "Number of generations: " << generations << endl; - cout << "Best chromosone info: " << endl; - cout << "\t-Path: " << tsp->getBestPathString() << endl; - cout << "\t-Goal function: " << tsp->getBestFitness() << endl; - cout << "\t-Distance: " << tsp->getLowestTotalDistance() << endl; - cout << "Average distance: " << tsp->getAverageDistance() << endl; - cout << "Initial average: " << initialAverage << endl; - delete tsp; - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/cc_param_defaults.json b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/cc_param_defaults.json deleted file mode 100644 index 067a26573a08ea6956e407833fa9ca17faafa4bf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/cc_param_defaults.json +++ /dev/null @@ -1 +0,0 @@ -{"max-pipeline-region-insns": {"default": 200, "max": 0, "min": 0}, "ipa-cp-loop-hint-bonus": {"default": 64, "max": 0, "min": 0}, "lim-expensive": {"default": 20, "max": 0, "min": 0}, "uninit-control-dep-attempts": {"default": 1000, "max": 0, "min": 1}, "lto-partitions": {"default": 32, "max": 0, "min": 1}, "max-inline-recursive-depth-auto": {"default": 8, "max": 0, "min": 0}, "max-unroll-times": {"default": 8, "max": 0, "min": 0}, "max-tail-merge-comparisons": {"default": 10, "max": 0, "min": 0}, "early-inlining-insns": {"default": 11, "max": 0, "min": 0}, "prefetch-latency": {"default": 200, "max": 0, "min": 0}, "partial-inlining-entry-probability": {"default": 70, "max": 0, "min": 0}, "integer-share-limit": {"default": 251, "max": 2, "min": 2}, "tm-max-aggregate-size": {"default": 9, "max": 0, "min": 0}, "ira-max-conflict-table-size": {"default": 1000, "max": 0, "min": 0}, "asan-instrument-reads": {"default": 1, "max": 1, "min": 0}, "lto-min-partition": {"default": 1000, "max": 0, "min": 0}, "hot-bb-frequency-fraction": {"default": 1000, "max": 0, "min": 0}, "min-vect-loop-bound": {"default": 1, "max": 0, "min": 1}, "max-crossjump-edges": {"default": 100, "max": 0, "min": 0}, "sms-dfa-history": {"default": 0, "max": 0, "min": 0}, "tracer-max-code-growth": {"default": 100, "max": 0, "min": 0}, "max-pipeline-region-blocks": {"default": 15, "max": 0, "min": 0}, "gcse-after-reload-partial-fraction": {"default": 3, "max": 0, "min": 0}, "asan-stack": {"default": 1, "max": 1, "min": 0}, "asan-memintrin": {"default": 1, "max": 1, "min": 0}, "large-function-insns": {"default": 2700, "max": 0, "min": 0}, "scev-max-expr-size": {"default": 100, "max": 0, "min": 0}, "iv-consider-all-candidates-bound": {"default": 30, "max": 0, "min": 0}, "max-partial-antic-length": {"default": 100, "max": 0, "min": 0}, "prefetch-min-insn-to-mem-ratio": {"default": 3, "max": 0, "min": 0}, "min-crossjump-insns": {"default": 5, "max": 0, "min": 1}, "asan-use-after-return": {"default": 1, "max": 1, "min": 0}, "allow-load-data-races": {"default": 1, "max": 1, "min": 0}, "max-jump-thread-duplication-stmts": {"default": 15, "max": 0, "min": 0}, "tracer-min-branch-probability": {"default": 50, "max": 100, "min": 0}, "l2-cache-size": {"default": 512, "max": 0, "min": 0}, "max-cse-insns": {"default": 1000, "max": 0, "min": 0}, "sched-pressure-algorithm": {"default": 1, "max": 2, "min": 1}, "max-unrolled-insns": {"default": 200, "max": 0, "min": 0}, "ipa-cp-value-list-size": {"default": 8, "max": 0, "min": 0}, "graphite-max-nb-scop-params": {"default": 10, "max": 0, "min": 0}, "max-completely-peel-times": {"default": 16, "max": 0, "min": 0}, "min-inline-recursive-probability": {"default": 10, "max": 0, "min": 0}, "max-stores-to-sink": {"default": 2, "max": 0, "min": 0}, "sink-frequency-threshold": {"default": 75, "max": 100, "min": 0}, "builtin-expect-probability": {"default": 90, "max": 100, "min": 0}, "max-average-unrolled-insns": {"default": 80, "max": 0, "min": 0}, "tracer-min-branch-ratio": {"default": 10, "max": 100, "min": 0}, "inline-unit-growth": {"default": 30, "max": 0, "min": 0}, "max-early-inliner-iterations": {"default": 1, "max": 0, "min": 0}, "hot-bb-count-ws-permille": {"default": 999, "max": 1000, "min": 0}, "max-gcse-memory": {"default": 52428800, "max": 0, "min": 0}, "ggc-min-expand": {"default": 30, "max": 0, "min": 0}, "tree-reassoc-width": {"default": 0, "max": 0, "min": 0}, "max-once-peeled-insns": {"default": 400, "max": 0, "min": 0}, "max-inline-recursive-depth": {"default": 8, "max": 0, "min": 0}, "max-inline-insns-recursive": {"default": 450, "max": 0, "min": 0}, "ira-loop-reserved-regs": {"default": 2, "max": 0, "min": 0}, "align-loop-iterations": {"default": 4, "max": 0, "min": 0}, "gcse-cost-distance-ratio": {"default": 10, "max": 0, "min": 0}, "sched-mem-true-dep-cost": {"default": 1, "max": 0, "min": 0}, "gcse-unrestricted-cost": {"default": 3, "max": 0, "min": 0}, "max-inline-insns-recursive-auto": {"default": 450, "max": 0, "min": 0}, "max-cse-path-length": {"default": 10, "max": 0, "min": 1}, "switch-conversion-max-branch-ratio": {"default": 8, "max": 0, "min": 1}, "max-tracked-strlens": {"default": 1000, "max": 0, "min": 0}, "inline-min-speedup": {"default": 10, "max": 0, "min": 0}, "max-cselib-memory-locations": {"default": 500, "max": 0, "min": 0}, "max-tail-merge-iterations": {"default": 2, "max": 0, "min": 0}, "max-inline-insns-auto": {"default": 40, "max": 0, "min": 0}, "min-insn-to-prefetch-ratio": {"default": 9, "max": 0, "min": 0}, "max-slsr-cand-scan": {"default": 50, "max": 999999, "min": 1}, "min-nondebug-insn-uid": {"default": 0, "max": 0, "min": 1}, "max-sched-region-blocks": {"default": 10, "max": 0, "min": 0}, "vect-max-version-for-alignment-checks": {"default": 6, "max": 0, "min": 0}, "max-vartrack-size": {"default": 50000000, "max": 0, "min": 0}, "loop-max-datarefs-for-datadeps": {"default": 1000, "max": 0, "min": 0}, "asan-instrument-writes": {"default": 1, "max": 1, "min": 0}, "asan-globals": {"default": 1, "max": 1, "min": 0}, "large-function-growth": {"default": 100, "max": 0, "min": 0}, "max-last-value-rtl": {"default": 10000, "max": 0, "min": 0}, "selsched-max-sched-times": {"default": 2, "max": 0, "min": 0}, "sms-max-ii-factor": {"default": 100, "max": 0, "min": 0}, "max-hoist-depth": {"default": 30, "max": 0, "min": 0}, "comdat-sharing-probability": {"default": 20, "max": 0, "min": 0}, "allow-store-data-races": {"default": 1, "max": 1, "min": 0}, "omega-max-vars": {"default": 128, "max": 0, "min": 0}, "iv-max-considered-uses": {"default": 250, "max": 0, "min": 0}, "max-inline-insns-single": {"default": 400, "max": 0, "min": 0}, "simultaneous-prefetches": {"default": 3, "max": 0, "min": 0}, "ipa-max-agg-items": {"default": 16, "max": 0, "min": 0}, "max-peel-times": {"default": 16, "max": 0, "min": 0}, "min-size-for-stack-sharing": {"default": 32, "max": 0, "min": 0}, "ira-max-loops-num": {"default": 100, "max": 0, "min": 0}, "tracer-dynamic-coverage": {"default": 75, "max": 100, "min": 0}, "max-gcse-insertion-ratio": {"default": 20, "max": 0, "min": 0}, "tracer-min-branch-probability-feedback": {"default": 80, "max": 100, "min": 0}, "max-sched-insn-conflict-delay": {"default": 3, "max": 10, "min": 1}, "max-peeled-insns": {"default": 100, "max": 0, "min": 0}, "max-dse-active-local-stores": {"default": 5000, "max": 0, "min": 0}, "max-variable-expansions-in-unroller": {"default": 1, "max": 0, "min": 0}, "max-delay-slot-live-search": {"default": 333, "max": 0, "min": 0}, "min-spec-prob": {"default": 40, "max": 0, "min": 0}, "loop-invariant-max-bbs-in-loop": {"default": 10000, "max": 0, "min": 0}, "selsched-insns-to-rename": {"default": 2, "max": 0, "min": 0}, "max-completely-peel-loop-nest-depth": {"default": 8, "max": 0, "min": 0}, "allow-packed-store-data-races": {"default": 1, "max": 1, "min": 0}, "omega-eliminate-redundant-constraints": {"default": 0, "max": 1, "min": 0}, "omega-max-geqs": {"default": 256, "max": 0, "min": 0}, "l1-cache-line-size": {"default": 32, "max": 0, "min": 0}, "case-values-threshold": {"default": 0, "max": 0, "min": 0}, "max-pending-list-length": {"default": 32, "max": 0, "min": 0}, "sccvn-max-alias-queries-per-access": {"default": 1000, "max": 0, "min": 0}, "max-vartrack-expr-depth": {"default": 12, "max": 0, "min": 0}, "loop-block-tile-size": {"default": 51, "max": 0, "min": 0}, "sms-loop-average-count-threshold": {"default": 0, "max": 0, "min": 0}, "vect-max-peeling-for-alignment": {"default": -1, "max": 64, "min": -1}, "selsched-max-lookahead": {"default": 50, "max": 0, "min": 0}, "omega-max-keys": {"default": 500, "max": 0, "min": 0}, "sccvn-max-scc-size": {"default": 10000, "max": 0, "min": 10}, "predictable-branch-outcome": {"default": 2, "max": 50, "min": 0}, "ssp-buffer-size": {"default": 8, "max": 0, "min": 1}, "max-delay-slot-insn-search": {"default": 100, "max": 0, "min": 0}, "sms-min-sc": {"default": 2, "max": 1, "min": 1}, "lra-max-considered-reload-pseudos": {"default": 500, "max": 0, "min": 0}, "tracer-dynamic-coverage-feedback": {"default": 95, "max": 100, "min": 0}, "omega-max-eqs": {"default": 128, "max": 0, "min": 0}, "max-fields-for-field-sensitive": {"default": 0, "max": 0, "min": 0}, "max-sched-region-insns": {"default": 100, "max": 0, "min": 0}, "large-stack-frame-growth": {"default": 1000, "max": 0, "min": 0}, "omega-max-wild-cards": {"default": 18, "max": 0, "min": 0}, "max-sched-extend-regions-iters": {"default": 0, "max": 0, "min": 0}, "max-unswitch-insns": {"default": 50, "max": 0, "min": 0}, "ipcp-unit-growth": {"default": 10, "max": 0, "min": 0}, "max-unswitch-level": {"default": 3, "max": 0, "min": 0}, "l1-cache-size": {"default": 64, "max": 0, "min": 0}, "max-grow-copy-bb-insns": {"default": 8, "max": 0, "min": 0}, "max-iterations-computation-cost": {"default": 10, "max": 0, "min": 0}, "ipa-cp-array-index-hint-bonus": {"default": 48, "max": 0, "min": 0}, "ggc-min-heapsize": {"default": 4096, "max": 0, "min": 0}, "align-threshold": {"default": 100, "max": 0, "min": 1}, "graphite-max-bbs-per-function": {"default": 100, "max": 0, "min": 0}, "max-vartrack-reverse-op-size": {"default": 50, "max": 0, "min": 0}, "ipa-sra-ptr-growth-factor": {"default": 2, "max": 0, "min": 0}, "max-completely-peeled-insns": {"default": 100, "max": 0, "min": 0}, "ipa-cp-eval-threshold": {"default": 500, "max": 0, "min": 0}, "large-stack-frame": {"default": 256, "max": 0, "min": 0}, "max-modulo-backtrack-attempts": {"default": 40, "max": 0, "min": 0}, "omega-hash-table-size": {"default": 550, "max": 0, "min": 0}, "max-goto-duplication-insns": {"default": 8, "max": 0, "min": 0}, "max-sched-ready-insns": {"default": 100, "max": 0, "min": 0}, "max-iterations-to-track": {"default": 1000, "max": 0, "min": 0}, "scev-max-expr-complexity": {"default": 10, "max": 0, "min": 0}, "cxx-max-namespaces-for-diagnostic-help": {"default": 1000, "max": 0, "min": 0}, "max-reload-search-insns": {"default": 100, "max": 0, "min": 0}, "use-canonical-types": {"default": 1, "max": 1, "min": 0}, "gcse-after-reload-critical-fraction": {"default": 10, "max": 0, "min": 0}, "sched-state-edge-prob-cutoff": {"default": 10, "max": 100, "min": 0}, "sched-spec-prob-cutoff": {"default": 40, "max": 100, "min": 0}, "unlikely-bb-count-fraction": {"default": 20, "max": 10000, "min": 1}, "slp-max-insns-in-bb": {"default": 1000, "max": 0, "min": 0}, "max-peel-branches": {"default": 32, "max": 0, "min": 0}, "large-unit-insns": {"default": 10000, "max": 0, "min": 0}, "iv-always-prune-cand-set-bound": {"default": 10, "max": 0, "min": 0}, "vect-max-version-for-alias-checks": {"default": 10, "max": 0, "min": 0}, "max-predicted-iterations": {"default": 100, "max": 0, "min": 0}, "allow-packed-load-data-races": {"default": 1, "max": 1, "min": 0}} \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/gccflags.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/gccflags.py deleted file mode 100755 index 1edc5bb8a3ce886e968a5f7d7d2e4f362ff8940b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/gccflags.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python -import adddeps # fix sys.path - -import math -import argparse -import ast -import collections -import json -import logging -import opentuner -import os -import random -import re -import shutil -import subprocess -import sys - -from opentuner.resultsdb.models import Result, TuningRun -from opentuner.search import manipulator - -FLAGS_WORKING_CACHE_FILE = 'cc_flags.json' -PARAMS_DEFAULTS_CACHE_FILE = 'cc_param_defaults.json' -PARAMS_DEF_PATH = '~/gcc-4.9.0/gcc/params.def' -PARAMS_WORKING_CACHE_FILE = 'cc_params.json' - -log = logging.getLogger('gccflags') - -argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) -argparser.add_argument('source', help='source file to compile') -argparser.add_argument('--compile-template', - default='{cc} {source} -o {output} -lpthread {flags}', - help='command to compile {source} into {output} with' - ' {flags}') -argparser.add_argument('--compile-limit', type=float, default=30, - help='kill gcc if it runs more than {default} sec') -argparser.add_argument('--scaler', type=int, default=4, - help='by what factor to try increasing parameters') -argparser.add_argument('--cc', default='g++', help='g++ or gcc') -argparser.add_argument('--output', default='./tmp.bin', - help='temporary file for compiler to write to') -argparser.add_argument('--debug', action='store_true', - help='on gcc errors try to find minimal set ' - 'of args to reproduce error') -argparser.add_argument('--force-killall', action='store_true', - help='killall cc1plus before each collection') -argparser.add_argument('--memory-limit', default=1024 ** 3, type=int, - help='memory limit for child process') -argparser.add_argument('--no-cached-flags', action='store_true', - help='regenerate the lists of legal flags each time') -argparser.add_argument('--flags-histogram', action='store_true', - help='print out a histogram of flags') -argparser.add_argument('--flag-importance', - help='Test the importance of different flags from a ' - 'given json file.') - - -class GccFlagsTuner(opentuner.measurement.MeasurementInterface): - def __init__(self, *pargs, **kwargs): - super(GccFlagsTuner, self).__init__(program_name=args.source, *pargs, - **kwargs) - self.gcc_version = self.extract_gcc_version() - self.cc_flags = self.extract_working_flags() - self.cc_param_defaults = self.extract_param_defaults() - self.cc_params = self.extract_working_params() - - # these bugs are hardcoded for now - # sets of options which causes gcc to barf - if True: - # These bugs were for gcc 4.7 on ubuntu - self.cc_bugs = (['-fipa-matrix-reorg', '-fwhole-program'], - ['-fno-tree-coalesce-inlined-vars'], - ['-fno-inline-atomics'], - ['-ftoplevel-reorder', '-fno-unit-at-a-time']) - else: - # Bugs for gcc 4.9 (work in progress, incomplete list) - self.cc_bugs = (['-ftoplevel-reorder', '-fno-unit-at-a-time'], ) - - self.result_list = {} - self.parallel_compile = True - try: - os.stat('./tmp') - except OSError: - os.mkdir('./tmp') - self.run_baselines() - - def run_baselines(self): - log.info("baseline perfs -O0=%.4f -O1=%.4f -O2=%.4f -O3=%.4f", - *[self.run_with_flags(['-O%d' % i], None).time - for i in range(4)]) - - def extract_gcc_version(self): - m = re.search(r'([0-9]+)[.]([0-9]+)[.]([0-9]+)', subprocess.check_output([ - self.args.cc, '--version'])) - if m: - gcc_version = tuple(map(int, m.group(1, 2, 3))) - else: - gcc_version = None - log.debug('gcc version %s', gcc_version) - return gcc_version - - def extract_working_flags(self): - """ - Figure out which gcc flags work (don't cause gcc to barf) by running - each one. - """ - if os.path.isfile(FLAGS_WORKING_CACHE_FILE) and not args.no_cached_flags: - # use cached version - found_cc_flags = json.load(open(FLAGS_WORKING_CACHE_FILE)) - else: - # extract flags from --help=optimizers - optimizers, err = subprocess.Popen([self.args.cc, '--help=optimizers'], - stdout=subprocess.PIPE).communicate() - found_cc_flags = re.findall(r'^ (-f[a-z0-9-]+) ', optimizers, - re.MULTILINE) - log.info('Determining which of %s possible gcc flags work', - len(found_cc_flags)) - found_cc_flags = filter(self.check_if_flag_works, found_cc_flags) - json.dump(found_cc_flags, open(FLAGS_WORKING_CACHE_FILE, 'w')) - return found_cc_flags - - def extract_param_defaults(self): - """ - Get the default, minimum, and maximum for each gcc parameter. - Requires source code for gcc to be in your home directory. - This example ships with a cached version so it does not require source. - """ - if os.path.isfile(PARAMS_DEFAULTS_CACHE_FILE) and not args.no_cached_flags: - # use cached version - param_defaults = json.load(open(PARAMS_DEFAULTS_CACHE_FILE)) - else: - # default values of params need to be extracted from source code, - # since they are not in --help - param_defaults = dict() - params_def = open(os.path.expanduser(PARAMS_DEF_PATH)).read() - for m in re.finditer(r'DEFPARAM *\((([^")]|"[^"]*")*)\)', params_def): - param_def_str = (m.group(1) - # Hacks!!! - .replace('GGC_MIN_EXPAND_DEFAULT', '30') - .replace('GGC_MIN_HEAPSIZE_DEFAULT', '4096') - .replace('50 * 1024 * 1024', '52428800')) - try: - name, desc, default, param_min, param_max = ast.literal_eval( - '[' + param_def_str.split(',', 1)[1] + ']') - param_defaults[name] = {'default': default, - 'min': param_min, - 'max': param_max} - except: - log.exception("error with %s", param_def_str) - json.dump(param_defaults, open(PARAMS_DEFAULTS_CACHE_FILE, 'w')) - return param_defaults - - def extract_working_params(self): - """ - Figure out which gcc params work (don't cause gcc to barf) by running - each one to test. - """ - params, err = subprocess.Popen( - [self.args.cc, '--help=params'], stdout=subprocess.PIPE).communicate() - all_params = re.findall(r'^ ([a-z0-9-]+) ', params, re.MULTILINE) - all_params = sorted(set(all_params) & - set(self.cc_param_defaults.keys())) - if os.path.isfile(PARAMS_WORKING_CACHE_FILE) and not args.no_cached_flags: - # use cached version - return json.load(open(PARAMS_WORKING_CACHE_FILE)) - else: - log.info('Determining which of %s possible gcc params work', - len(all_params)) - working_params = [] - for param in all_params: - if self.check_if_flag_works('--param={}={}'.format( - param, self.cc_param_defaults[param]['default'])): - working_params.append(param) - json.dump(working_params, open(PARAMS_WORKING_CACHE_FILE, 'w')) - return working_params - - def check_if_flag_works(self, flag, try_inverted=True): - cmd = args.compile_template.format(source=args.source, output=args.output, - flags=flag, cc=args.cc) - compile_result = self.call_program(cmd, limit=args.compile_limit) - if compile_result['returncode'] != 0: - log.warning("removing flag %s because it results in compile error", flag) - return False - if 'warning: this target' in compile_result['stderr']: - log.warning("removing flag %s because not supported by target", flag) - return False - if 'has been renamed' in compile_result['stderr']: - log.warning("removing flag %s because renamed", flag) - return False - if try_inverted and flag[:2] == '-f': - if not self.check_if_flag_works(invert_gcc_flag(flag), - try_inverted=False): - log.warning("Odd... %s works but %s does not", flag, - invert_gcc_flag(flag)) - return False - return True - - def manipulator(self): - m = manipulator.ConfigurationManipulator() - m.add_parameter(manipulator.IntegerParameter('-O', 0, 3)) - for flag in self.cc_flags: - m.add_parameter(manipulator.EnumParameter(flag, ['on', 'off', 'default'])) - for param in self.cc_params: - defaults = self.cc_param_defaults[param] - if defaults['max'] <= defaults['min']: - defaults['max'] = float('inf') - defaults['max'] = min(defaults['max'], - max(1, defaults['default']) * args.scaler) - defaults['min'] = max(defaults['min'], - max(1, defaults['default']) / args.scaler) - - if param == 'l1-cache-line-size': - # gcc requires this to be a power of two or it internal errors - m.add_parameter(manipulator.PowerOfTwoParameter(param, 4, 256)) - elif defaults['max'] > 128: - m.add_parameter(manipulator.LogIntegerParameter( - param, defaults['min'], defaults['max'])) - else: - m.add_parameter(manipulator.IntegerParameter( - param, defaults['min'], defaults['max'])) - - return m - - def cfg_to_flags(self, cfg): - flags = ['-O%d' % cfg['-O']] - for flag in self.cc_flags: - if cfg[flag] == 'on': - flags.append(flag) - elif cfg[flag] == 'off': - flags.append(invert_gcc_flag(flag)) - - for param in self.cc_params: - flags.append('--param=%s=%d' % (param, cfg[param])) - - # workaround sets of flags that trigger compiler crashes/hangs - for bugset in self.cc_bugs: - if len(set(bugset) & set(flags)) == len(bugset): - flags.remove(bugset[-1]) - return flags - - def make_command(self, cfg): - return args.compile_template.format(source=args.source, output=args.output, - flags=' '.join(self.cfg_to_flags(cfg)), - cc=args.cc) - - def get_tmpdir(self, result_id): - return './tmp/%d' % result_id - - def cleanup(self, result_id): - tmp_dir = self.get_tmpdir(result_id) - shutil.rmtree(tmp_dir) - - def compile_and_run(self, desired_result, input, limit): - cfg = desired_result.configuration.data - compile_result = self.compile(cfg, 0) - return self.run_precompiled(desired_result, input, limit, compile_result, 0) - - compile_results = {'ok': 0, 'timeout': 1, 'error': 2} - - def run_precompiled(self, desired_result, input, limit, compile_result, - result_id): - if self.args.force_killall: - os.system('killall -9 cc1plus 2>/dev/null') - # Make sure compile was successful - if compile_result == self.compile_results['timeout']: - return Result(state='TIMEOUT', time=float('inf')) - elif compile_result == self.compile_results['error']: - return Result(state='ERROR', time=float('inf')) - - tmp_dir = self.get_tmpdir(result_id) - output_dir = '%s/%s' % (tmp_dir, args.output) - try: - run_result = self.call_program([output_dir], limit=limit, - memory_limit=args.memory_limit) - except OSError: - return Result(state='ERROR', time=float('inf')) - - if run_result['returncode'] != 0: - if run_result['timeout']: - return Result(state='TIMEOUT', time=float('inf')) - else: - log.error('program error') - return Result(state='ERROR', time=float('inf')) - - return Result(time=run_result['time']) - - def debug_gcc_error(self, flags): - def fails(subflags): - cmd = args.compile_template.format(source=args.source, output=args.output, - flags=' '.join(subflags), - cc=args.cc) - compile_result = self.call_program(cmd, limit=args.compile_limit) - return compile_result['returncode'] != 0 - - if self.args.debug: - while len(flags) > 8: - log.error("compile error with %d flags, diagnosing...", len(flags)) - tmpflags = filter(lambda x: random.choice((True, False)), flags) - if fails(tmpflags): - flags = tmpflags - - # linear scan - minimal_flags = [] - for i in xrange(len(flags)): - tmpflags = minimal_flags + flags[i + 1:] - if not fails(tmpflags): - minimal_flags.append(flags[i]) - log.error("compiler crashes/hangs with flags: %s", minimal_flags) - - def compile(self, config_data, result_id): - flags = self.cfg_to_flags(config_data) - return self.compile_with_flags(flags, result_id) - - def compile_with_flags(self, flags, result_id): - tmp_dir = self.get_tmpdir(result_id) - try: - os.stat(tmp_dir) - except OSError: - os.mkdir(tmp_dir) - output_dir = '%s/%s' % (tmp_dir, args.output) - cmd = args.compile_template.format(source=args.source, output=output_dir, - flags=' '.join(flags), - cc=args.cc) - - compile_result = self.call_program(cmd, limit=args.compile_limit, - memory_limit=args.memory_limit) - if compile_result['returncode'] != 0: - if compile_result['timeout']: - log.warning("gcc timeout") - return self.compile_results['timeout'] - else: - log.warning("gcc error %s", compile_result['stderr']) - self.debug_gcc_error(flags) - return self.compile_results['error'] - return self.compile_results['ok'] - - def run_with_flags(self, flags, limit): - return self.run_precompiled(None, None, limit, - self.compile_with_flags(flags, 0), 0) - - def save_final_config(self, configuration): - """called at the end of tuning""" - print "Best flags written to gccflags_final_config.{json,cmd}" - self.manipulator().save_to_file(configuration.data, - 'gccflags_final_config.json') - with open('gccflags_final_config.cmd', 'w') as fd: - fd.write(self.make_command(configuration.data)) - - def flags_histogram(self, session): - counter = collections.Counter() - q = session.query(TuningRun).filter_by(state='COMPLETE') - total = q.count() - for tr in q: - print tr.program.name - for flag in self.cfg_to_flags(tr.final_config.data): - counter[flag] += 1.0 / total - print counter.most_common(20) - - def flag_importance(self): - """ - Test the importance of each flag by measuring the performance with that - flag removed. Print out a table for paper - """ - with open(self.args.flag_importance) as fd: - best_cfg = json.load(fd) - flags = self.cfg_to_flags(best_cfg) - counter = collections.Counter() - baseline_time = self.flags_mean_time(flags) - for flag in flags[1:]: - delta_flags = [f for f in flags if f != flag] - flag_time = self.flags_mean_time(delta_flags) - impact = max(0.0, flag_time - baseline_time) - if math.isinf(impact): - impact = 0.0 - counter[flag] = impact - print flag, '{:.4f}'.format(impact) - total_impact = sum(counter.values()) - remaining_impact = total_impact - print r'\bf Flag & \bf Importance \\\hline' - for flag, impact in counter.most_common(20): - print r'{} & {:.1f}\% \\\hline'.format(flag, 100.0 * impact / total_impact) - remaining_impact -= impact - print r'{} other flags & {:.1f}% \\\hline'.format( - len(flags) - 20, 100.0 * remaining_impact / total_impact) - - def flags_mean_time(self, flags, trials=10): - precompiled = self.compile_with_flags(flags, 0) - total = 0.0 - for _ in xrange(trials): - total += self.run_precompiled(None, None, None, precompiled, 0).time - return total / trials - - def prefix_hook(self, session): - if self.args.flags_histogram: - self.flags_histogram(session) - sys.exit(0) - if self.args.flag_importance: - self.flag_importance() - sys.exit(0) - - - -def invert_gcc_flag(flag): - assert flag[:2] == '-f' - if flag[2:5] != 'no-': - return '-fno-' + flag[2:] - return '-f' + flag[5:] - - -if __name__ == '__main__': - opentuner.init_logging() - args = argparser.parse_args() - GccFlagsTuner.main(args) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/gccflags_minimal.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/gccflags_minimal.py deleted file mode 100755 index 0363b984a8c67064102e9025ce57d388c2585514..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/gccflags/gccflags_minimal.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python -# -# Autotune flags to g++ to optimize the performance of apps/raytracer.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import EnumParameter -from opentuner import IntegerParameter -from opentuner import MeasurementInterface -from opentuner import Result - -GCC_FLAGS = [ - 'align-functions', 'align-jumps', 'align-labels', - 'align-loops', 'asynchronous-unwind-tables', - 'branch-count-reg', 'branch-probabilities', - # ... (176 total) -] - -# (name, min, max) -GCC_PARAMS = [ - ('early-inlining-insns', 0, 1000), - ('gcse-cost-distance-ratio', 0, 100), - ('iv-max-considered-uses', 0, 1000), - # ... (145 total) -] - - -class GccFlagsTuner(MeasurementInterface): - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - manipulator.add_parameter( - IntegerParameter('opt_level', 0, 3)) - for flag in GCC_FLAGS: - manipulator.add_parameter( - EnumParameter(flag, - ['on', 'off', 'default'])) - for param, min, max in GCC_PARAMS: - manipulator.add_parameter( - IntegerParameter(param, min, max)) - return manipulator - - def compile(self, cfg, id): - """ - Compile a given configuration in parallel - """ - gcc_cmd = 'g++ apps/raytracer.cpp -o ./tmp{0}.bin'.format(id) - gcc_cmd += ' -O{0}'.format(cfg['opt_level']) - for flag in GCC_FLAGS: - if cfg[flag] == 'on': - gcc_cmd += ' -f{0}'.format(flag) - elif cfg[flag] == 'off': - gcc_cmd += ' -fno-{0}'.format(flag) - for param, min, max in GCC_PARAMS: - gcc_cmd += ' --param {0}={1}'.format( - param, cfg[param]) - return self.call_program(gcc_cmd) - - def run_precompiled(self, desired_result, input, limit, compile_result, id): - """ - Run a compile_result from compile() sequentially and return performance - """ - assert compile_result['returncode'] == 0 - - try: - run_result = self.call_program('./tmp{0}.bin'.format(id)) - assert run_result['returncode'] == 0 - finally: - self.call_program('rm ./tmp{0}.bin'.format(id)) - - return Result(time=run_result['time']) - - def compile_and_run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - compile_result = self.compile(cfg, 0) - return self.run_precompiled(desired_result, input, limit, compile_result, 0) - -if __name__ == '__main__': - argparser = opentuner.default_argparser() - GccFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/.gitignore deleted file mode 100644 index ebdc2a395f4c8b509233d88992512c4cf4ae3364..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -dump-call-graph -*.callgraph diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/bilateral_grid.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/bilateral_grid.cpp deleted file mode 100644 index 6f1c97ffb85967223bf4e2ebc16d8ae0c2bcd02b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/bilateral_grid.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include "Halide.h" -#include <stdio.h> - -using namespace Halide; - -int main(int argc, char **argv) { - // if (argc < 2) { - // printf("Usage: bilateral_grid <s_sigma>\n"); - // // printf("Spatial sigma is a compile-time parameter, please provide it as an argument.\n" - // // "(llvm's ptx backend doesn't handle integer mods by non-consts yet)\n"); - // return 0; - // } - - ImageParam input(Float(32), 2); - float r_sigma = 0.1; - // int s_sigma = atoi(argv[1]); - int s_sigma = 4; - Var x("x"), y("y"), z("z"), c("c"); - - // Add a boundary condition - Func clamped("clamped"); - clamped(x, y) = input(clamp(x, 0, input.width()-1), - clamp(y, 0, input.height()-1)); - - // Construct the bilateral grid - RDom r(0, s_sigma, 0, s_sigma); - Expr val = clamped(x * s_sigma + r.x - s_sigma/2, y * s_sigma + r.y - s_sigma/2); - val = clamp(val, 0.0f, 1.0f); - Expr zi = cast<int>(val * (1.0f/r_sigma) + 0.5f); - Func grid("grid"), histogram("histogram"); - histogram(x, y, zi, c) += select(c == 0, val, 1.0f); - - // Introduce a dummy function, so we can schedule the histogram within it - grid(x, y, z, c) = histogram(x, y, z, c); - - // Blur the grid using a five-tap filter - Func blurx("blurx"), blury("blury"), blurz("blurz"); - blurx(x, y, z, _) = grid(x-2, y, z, _) + grid(x-1, y, z, _)*4 + grid(x, y, z, _)*6 + grid(x+1, y, z, _)*4 + grid(x+2, y, z, _); - blury(x, y, z, _) = blurx(x, y-2, z, _) + blurx(x, y-1, z, _)*4 + blurx(x, y, z, _)*6 + blurx(x, y+1, z, _)*4 + blurx(x, y+2, z, _); - blurz(x, y, z, _) = blury(x, y, z-2, _) + blury(x, y, z-1, _)*4 + blury(x, y, z, _)*6 + blury(x, y, z+1, _)*4 + blury(x, y, z+2, _); - - // Take trilinear samples to compute the output - val = clamp(clamped(x, y), 0.0f, 1.0f); - Expr zv = val * (1.0f/r_sigma); - zi = cast<int>(zv); - Expr zf = zv - zi; - Expr xf = cast<float>(x % s_sigma) / s_sigma; - Expr yf = cast<float>(y % s_sigma) / s_sigma; - Expr xi = x/s_sigma; - Expr yi = y/s_sigma; - Func interpolated("interpolated"); - interpolated(x, y, _) = - lerp(lerp(lerp(blurz(xi, yi, zi, _), blurz(xi+1, yi, zi, _), xf), - lerp(blurz(xi, yi+1, zi, _), blurz(xi+1, yi+1, zi, _), xf), yf), - lerp(lerp(blurz(xi, yi, zi+1, _), blurz(xi+1, yi, zi+1, _), xf), - lerp(blurz(xi, yi+1, zi+1, _), blurz(xi+1, yi+1, zi+1, _), xf), yf), zf); - - // Normalize - Func bilateral_grid("bilateral_grid"); - bilateral_grid(x, y) = interpolated(x, y, 0)/interpolated(x, y, 1); - - AUTOTUNE_HOOK(bilateral_grid); - - char *target = getenv("HL_TARGET"); - if (target && std::string(target) == "ptx") { - - // GPU schedule - grid.compute_root().reorder(z, c, x, y).cuda_tile(x, y, 8, 8); - - // Compute the histogram into shared memory before spilling it to global memory - histogram.store_at(grid, Var("blockidx")).compute_at(grid, Var("threadidx")); - - blurx.compute_root().cuda_tile(x, y, z, 16, 16, 1); - blury.compute_root().cuda_tile(x, y, z, 16, 16, 1); - blurz.compute_root().cuda_tile(x, y, z, 8, 8, 4); - bilateral_grid.compute_root().cuda_tile(x, y, s_sigma, s_sigma); - } else { - - // CPU schedule - grid.compute_root().reorder(c, z, x, y).parallel(y); - histogram.compute_at(grid, x).unroll(c); - blurx.compute_root().parallel(z).vectorize(x, 4); - blury.compute_root().parallel(z).vectorize(x, 4); - blurz.compute_root().parallel(z).vectorize(x, 4); - bilateral_grid.compute_root().parallel(y).vectorize(x, 4); - } - - BASELINE_HOOK(bilateral_grid); - - //bilateral_grid.compile_to_file("bilateral_grid", r_sigma, input); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/bilateral_grid.settings b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/bilateral_grid.settings deleted file mode 100644 index 7b829b779a9f7d05ef7b677ea307430728e29f16..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/bilateral_grid.settings +++ /dev/null @@ -1,10 +0,0 @@ -{"input_size": "2048, 2048", - "functions": [ - {"name": "clamped", "vars": ["x", "y"], "calls": []}, - {"name": "histogram", "vars": ["x", "y", "c"], "calls": ["clamped"]}, - {"name": "grid", "vars": ["x", "y", "z", "c"], "calls": ["histogram"]}, - {"name": "blurx", "vars": ["x", "y", "z"], "calls": ["grid"]}, - {"name": "blury", "vars": ["x", "y", "z"], "calls": ["blurx"]}, - {"name": "blurz", "vars": ["x", "y", "z"], "calls": ["blury"]}, - {"name": "interpolated", "vars": ["x", "y"], "calls": ["blurz", "clamped"]}, - {"name": "bilateral_grid", "vars": ["x", "y"], "calls": ["interpolated"]}]} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/halide_blur.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/halide_blur.cpp deleted file mode 100644 index 7a38dd45fd8bf48ecc1d7489efe991dd320c0b63..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/halide_blur.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include <Halide.h> -using namespace Halide; - -#define AUTOTUNE_HOOK(x) -#define BASELINE_HOOK(x) - -int main(int argc, char **argv) { - - ImageParam in_img(UInt(16), 2); - Func blur_x("blur_x"), blur_y("blur_y"); - Var x("x"), y("y"), xi("xi"), yi("yi"); - - Func input; - input(x,y) = in_img(clamp(x, 1, in_img.width()-1), - clamp(y, 1, in_img.height())-1); - - // The algorithm - blur_x(x, y) = (input(x, y) + input(x+1, y) + input(x+2, y))/3; - blur_y(x, y) = (blur_x(x, y) + blur_x(x, y+1) + blur_x(x, y+2))/3; - - AUTOTUNE_HOOK(blur_y); - - // How to schedule it - blur_y.split(y, y, yi, 8).parallel(y).vectorize(x, 8); - blur_x.store_at(blur_y, y).compute_at(blur_y, yi).vectorize(x, 8); - - BASELINE_HOOK(blur_y); - - blur_y.compile_to_file("halide_blur", in_img); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/halide_blur.settings b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/halide_blur.settings deleted file mode 100644 index af0deeac34966616ff0f0af7a008c0c6f74ef2cb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/halide_blur.settings +++ /dev/null @@ -1,4 +0,0 @@ -{"input_size": "4096, 4096", - "functions": [ - {"name": "blur_x", "vars": ["x", "y"], "calls": []}, - {"name": "blur_y", "vars": ["x", "y"], "calls": ["blur_x"]}]} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simple.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simple.cpp deleted file mode 100644 index 74d141721db7e22667505f35d1c894d081ae064d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simple.cpp +++ /dev/null @@ -1,208 +0,0 @@ -#include "Halide.h" - -#define AUTOTUNE_HOOK(x) -#define BASELINE_HOOK(x) - -using namespace Halide; - -#include <iostream> -#include <limits> - -#include <sys/time.h> - -using std::vector; - -double now() { - struct timeval tv; - gettimeofday(&tv, NULL); - static bool first_call = true; - static time_t first_sec = 0; - if (first_call) { - first_call = false; - first_sec = tv.tv_sec; - } - assert(tv.tv_sec >= first_sec); - return (tv.tv_sec - first_sec) + (tv.tv_usec / 1000000.0); -} - -int main(int argc, char **argv) { - ImageParam input(Float(32), 3, "input"); - - const unsigned int levels = 3; - - Func downsampled[levels]; - Func downx[levels]; - Func interpolated[levels]; - Func upsampled[levels]; - Func upsampledx[levels]; - Var x("x"), y("y"), c("c"); - - downsampled[0] = Func("downsampled"); - downx[0] = Func("downx"); - interpolated[0] = Func("interpolated"); - upsampled[0] = Func("upsampled"); - upsampledx[0] = Func("upsampledx"); - - Func clamped("clamped"); - clamped(x, y, c) = input(clamp(x, 0, input.width()-1), clamp(y, 0, input.height()-1), c); - - // This triggers a bug in llvm 3.3 (3.2 and trunk are fine), so we - // rewrite it in a way that doesn't trigger the bug. The rewritten - // form assumes the input alpha is zero or one. - // downsampled[0](x, y, c) = select(c < 3, clamped(x, y, c) * clamped(x, y, 3), clamped(x, y, 3)); - downsampled[0](x, y, c) = clamped(x, y, c) * clamped(x, y, 3); - - for (unsigned int l = 1; l < levels; ++l) { - downx[l] = Func("downx"); - downsampled[l] = Func("downsampled"); - downx[l](x, y, c) = (downsampled[l-1](x*2-1, y, c) + - 2.0f * downsampled[l-1](x*2, y, c) + - downsampled[l-1](x*2+1, y, c)) * 0.25f; - downsampled[l](x, y, c) = (downx[l](x, y*2-1, c) + - 2.0f * downx[l](x, y*2, c) + - downx[l](x, y*2+1, c)) * 0.25f; - } - interpolated[levels-1] = Func("interpolated"); - interpolated[levels-1](x, y, c) = downsampled[levels-1](x, y, c); - for (unsigned int l = levels-2; l < levels; --l) { - upsampledx[l] = Func("upsampledx"); - upsampled[l] = Func("upsampled"); - interpolated[l] = Func("interpolated"); - upsampledx[l](x, y, c) = select((x % 2) == 0, - interpolated[l+1](x/2, y, c), - 0.5f * (interpolated[l+1](x/2, y, c) + - interpolated[l+1](x/2+1, y, c))); - upsampled[l](x, y, c) = select((y % 2) == 0, - upsampledx[l](x, y/2, c), - 0.5f * (upsampledx[l](x, y/2, c) + - upsampledx[l](x, y/2+1, c))); - interpolated[l](x, y, c) = downsampled[l](x, y, c) + (1.0f - downsampled[l](x, y, 3)) * upsampled[l](x, y, c); - } - - Func normalize("normalize"); - normalize(x, y, c) = interpolated[0](x, y, c) / interpolated[0](x, y, 3); - - Func final("final"); - final(x, y, c) = normalize(x, y, c); - - AUTOTUNE_HOOK(final); - - int sched; - char *target = getenv("HL_TARGET"); - if (target && std::string(target) == "ptx") { - sched = 4; - } else { - sched = 2; - } - - switch (sched) { - case 0: - { - //std::cout << "Flat schedule." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - downsampled[l].compute_root(); - interpolated[l].compute_root(); - } - final.compute_root(); - break; - } - case 1: - { - //std::cout << "Flat schedule with vectorization." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - downsampled[l].compute_root().vectorize(x,4); - interpolated[l].compute_root().vectorize(x,4); - } - final.compute_root(); - break; - } - case 2: - { - Var xi, yi; - //std::cout << "Flat schedule with parallelization + vectorization." << std::endl; - clamped.compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - for (unsigned int l = 1; l < levels-1; ++l) { - if (l > 0) downsampled[l].compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - interpolated[l].compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - interpolated[l].unroll(x, 2).unroll(y, 2); - } - final.reorder(c, x, y).bound(c, 0, 3).parallel(y); - final.tile(x, y, xi, yi, 2, 2).unroll(xi).unroll(yi); - break; - } - case 3: - { - //std::cout << "Flat schedule with vectorization sometimes." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - if (l + 4 < levels) { - Var yo,yi; - downsampled[l].compute_root().vectorize(x,4); - interpolated[l].compute_root().vectorize(x,4); - } else { - downsampled[l].compute_root(); - interpolated[l].compute_root(); - } - } - final.compute_root(); - break; - } - case 4: - { - //std::cout << "GPU schedule." << std::endl; - - // Some gpus don't have enough memory to process the entire - // image, so we process the image in tiles. - Var yo, yi, xo, xi; - final.reorder(c, x, y).bound(c, 0, 3).vectorize(x, 4); - final.tile(x, y, xo, yo, xi, yi, input.width()/4, input.height()/4); - normalize.compute_at(final, xo).reorder(c, x, y).cuda_tile(x, y, 16, 16).unroll(c); - - // Start from level 1 to save memory - level zero will be computed on demand - for (unsigned int l = 1; l < levels; ++l) { - int tile_size = 32 >> l; - if (tile_size < 1) tile_size = 1; - if (tile_size > 16) tile_size = 16; - downsampled[l].compute_root().cuda_tile(x, y, c, tile_size, tile_size, 4); - interpolated[l].compute_at(final, xo).cuda_tile(x, y, c, tile_size, tile_size, 4); - } - - break; - } - default: - assert(0 && "No schedule with this number."); - } - - BASELINE_HOOK(final); - -#if 0 - // JIT compile the pipeline eagerly, so we don't interfere with timing - final.compile_jit(); - - // Image<float> in_png = load<float>(argv[1]); - Image<float> out(2048, 2048, 3); - // assert(in_png.channels() == 4); - // input.set(in_png); - final.infer_input_bounds(out); - - std::cout << "Running... " << std::endl; - double min = std::numeric_limits<double>::infinity(); - const unsigned int iters = 20; - - for (unsigned int x = 0; x < iters; ++x) { - double before = now(); - final.realize(out); - double after = now(); - double amt = after - before; - - std::cout << " " << amt * 1000 << std::endl; - if (amt < min) min = amt; - - } - std::cout << " took " << min * 1000 << " msec." << std::endl; - - // vector<Argument> args; - // args.push_back(input); - // final.compile_to_assembly("test.s", args); - // save(out, argv[2]); -#endif -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simple.settings b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simple.settings deleted file mode 100644 index cffd184ed6bc406bb88fb52d1e67ac2349df9532..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simple.settings +++ /dev/null @@ -1,185 +0,0 @@ -{ - "functions": [ - { - "calls": [], - "name": "clamped", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "clamped" - ], - "name": "downsampled", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$2" - ], - "name": "downsampled$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$3" - ], - "name": "downsampled$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled" - ], - "name": "downx$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$2" - ], - "name": "downx$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$3" - ], - "name": "interpolated$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$2", - "upsampled$2" - ], - "name": "interpolated$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled", - "upsampled$3" - ], - "name": "interpolated$4", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$4" - ], - "name": "normalize", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$2" - ], - "name": "upsampled$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$3" - ], - "name": "upsampled$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$2" - ], - "name": "upsampledx$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$3" - ], - "name": "upsampledx$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "normalize" - ], - "name": "final", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - } - ], - "input_size": "2048, 2048, 3" -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simplest.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simplest.cpp deleted file mode 100644 index cf570558360236d16b21379901e40b3ee8481fd4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simplest.cpp +++ /dev/null @@ -1,208 +0,0 @@ -#include "Halide.h" - -#define AUTOTUNE_HOOK(x) -#define BASELINE_HOOK(x) - -using namespace Halide; - -#include <iostream> -#include <limits> - -#include <sys/time.h> - -using std::vector; - -double now() { - struct timeval tv; - gettimeofday(&tv, NULL); - static bool first_call = true; - static time_t first_sec = 0; - if (first_call) { - first_call = false; - first_sec = tv.tv_sec; - } - assert(tv.tv_sec >= first_sec); - return (tv.tv_sec - first_sec) + (tv.tv_usec / 1000000.0); -} - -int main(int argc, char **argv) { - ImageParam input(Float(32), 3, "input"); - - const unsigned int levels = 2; - - Func downsampled[levels]; - Func downx[levels]; - Func interpolated[levels]; - Func upsampled[levels]; - Func upsampledx[levels]; - Var x("x"), y("y"), c("c"); - - downsampled[0] = Func("downsampled"); - downx[0] = Func("downx"); - interpolated[0] = Func("interpolated"); - upsampled[0] = Func("upsampled"); - upsampledx[0] = Func("upsampledx"); - - Func clamped("clamped"); - clamped(x, y, c) = input(clamp(x, 0, input.width()-1), clamp(y, 0, input.height()-1), c); - - // This triggers a bug in llvm 3.3 (3.2 and trunk are fine), so we - // rewrite it in a way that doesn't trigger the bug. The rewritten - // form assumes the input alpha is zero or one. - // downsampled[0](x, y, c) = select(c < 3, clamped(x, y, c) * clamped(x, y, 3), clamped(x, y, 3)); - downsampled[0](x, y, c) = clamped(x, y, c) * clamped(x, y, 3); - - for (unsigned int l = 1; l < levels; ++l) { - downx[l] = Func("downx"); - downsampled[l] = Func("downsampled"); - downx[l](x, y, c) = (downsampled[l-1](x*2-1, y, c) + - 2.0f * downsampled[l-1](x*2, y, c) + - downsampled[l-1](x*2+1, y, c)) * 0.25f; - downsampled[l](x, y, c) = (downx[l](x, y*2-1, c) + - 2.0f * downx[l](x, y*2, c) + - downx[l](x, y*2+1, c)) * 0.25f; - } - interpolated[levels-1] = Func("interpolated"); - interpolated[levels-1](x, y, c) = downsampled[levels-1](x, y, c); - for (unsigned int l = levels-2; l < levels; --l) { - upsampledx[l] = Func("upsampledx"); - upsampled[l] = Func("upsampled"); - interpolated[l] = Func("interpolated"); - upsampledx[l](x, y, c) = select((x % 2) == 0, - interpolated[l+1](x/2, y, c), - 0.5f * (interpolated[l+1](x/2, y, c) + - interpolated[l+1](x/2+1, y, c))); - upsampled[l](x, y, c) = select((y % 2) == 0, - upsampledx[l](x, y/2, c), - 0.5f * (upsampledx[l](x, y/2, c) + - upsampledx[l](x, y/2+1, c))); - interpolated[l](x, y, c) = downsampled[l](x, y, c) + (1.0f - downsampled[l](x, y, 3)) * upsampled[l](x, y, c); - } - - Func normalize("normalize"); - normalize(x, y, c) = interpolated[0](x, y, c) / interpolated[0](x, y, 3); - - Func final("final"); - final(x, y, c) = normalize(x, y, c); - - AUTOTUNE_HOOK(final); - - int sched; - char *target = getenv("HL_TARGET"); - if (target && std::string(target) == "ptx") { - sched = 4; - } else { - sched = 2; - } - - switch (sched) { - case 0: - { - //std::cout << "Flat schedule." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - downsampled[l].compute_root(); - interpolated[l].compute_root(); - } - final.compute_root(); - break; - } - case 1: - { - //std::cout << "Flat schedule with vectorization." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - downsampled[l].compute_root().vectorize(x,4); - interpolated[l].compute_root().vectorize(x,4); - } - final.compute_root(); - break; - } - case 2: - { - Var xi, yi; - //std::cout << "Flat schedule with parallelization + vectorization." << std::endl; - clamped.compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - for (unsigned int l = 1; l < levels-1; ++l) { - if (l > 0) downsampled[l].compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - interpolated[l].compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - interpolated[l].unroll(x, 2).unroll(y, 2); - } - final.reorder(c, x, y).bound(c, 0, 3).parallel(y); - final.tile(x, y, xi, yi, 2, 2).unroll(xi).unroll(yi); - break; - } - case 3: - { - //std::cout << "Flat schedule with vectorization sometimes." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - if (l + 4 < levels) { - Var yo,yi; - downsampled[l].compute_root().vectorize(x,4); - interpolated[l].compute_root().vectorize(x,4); - } else { - downsampled[l].compute_root(); - interpolated[l].compute_root(); - } - } - final.compute_root(); - break; - } - case 4: - { - //std::cout << "GPU schedule." << std::endl; - - // Some gpus don't have enough memory to process the entire - // image, so we process the image in tiles. - Var yo, yi, xo, xi; - final.reorder(c, x, y).bound(c, 0, 3).vectorize(x, 4); - final.tile(x, y, xo, yo, xi, yi, input.width()/4, input.height()/4); - normalize.compute_at(final, xo).reorder(c, x, y).cuda_tile(x, y, 16, 16).unroll(c); - - // Start from level 1 to save memory - level zero will be computed on demand - for (unsigned int l = 1; l < levels; ++l) { - int tile_size = 32 >> l; - if (tile_size < 1) tile_size = 1; - if (tile_size > 16) tile_size = 16; - downsampled[l].compute_root().cuda_tile(x, y, c, tile_size, tile_size, 4); - interpolated[l].compute_at(final, xo).cuda_tile(x, y, c, tile_size, tile_size, 4); - } - - break; - } - default: - assert(0 && "No schedule with this number."); - } - - BASELINE_HOOK(final); - -#if 0 - // JIT compile the pipeline eagerly, so we don't interfere with timing - final.compile_jit(); - - // Image<float> in_png = load<float>(argv[1]); - Image<float> out(2048, 2048, 3); - // assert(in_png.channels() == 4); - // input.set(in_png); - final.infer_input_bounds(out); - - std::cout << "Running... " << std::endl; - double min = std::numeric_limits<double>::infinity(); - const unsigned int iters = 20; - - for (unsigned int x = 0; x < iters; ++x) { - double before = now(); - final.realize(out); - double after = now(); - double amt = after - before; - - std::cout << " " << amt * 1000 << std::endl; - if (amt < min) min = amt; - - } - std::cout << " took " << min * 1000 << " msec." << std::endl; - - // vector<Argument> args; - // args.push_back(input); - // final.compile_to_assembly("test.s", args); - // save(out, argv[2]); -#endif -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simplest.settings b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simplest.settings deleted file mode 100644 index 5f22d20f5f9cf355bfd07ca408264f73031a14d0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate-simplest.settings +++ /dev/null @@ -1,124 +0,0 @@ -{ - "functions": [ - { - "calls": [], - "name": "clamped", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "clamped" - ], - "name": "downsampled", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$2" - ], - "name": "downsampled$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled" - ], - "name": "downx$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$2" - ], - "name": "interpolated$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled", - "upsampled$2" - ], - "name": "interpolated$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$3" - ], - "name": "normalize", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$2" - ], - "name": "upsampled$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$2" - ], - "name": "upsampledx$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "normalize" - ], - "name": "final", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - } - ], - "input_size": "2048, 2048, 3" -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate.cpp deleted file mode 100644 index 1ca4ae5bd352fa524ed6aeafbe5795c913e1f6b8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate.cpp +++ /dev/null @@ -1,208 +0,0 @@ -#include "Halide.h" - -#define AUTOTUNE_HOOK(x) -#define BASELINE_HOOK(x) - -using namespace Halide; - -#include <iostream> -#include <limits> - -#include <sys/time.h> - -using std::vector; - -double now() { - struct timeval tv; - gettimeofday(&tv, NULL); - static bool first_call = true; - static time_t first_sec = 0; - if (first_call) { - first_call = false; - first_sec = tv.tv_sec; - } - assert(tv.tv_sec >= first_sec); - return (tv.tv_sec - first_sec) + (tv.tv_usec / 1000000.0); -} - -int main(int argc, char **argv) { - ImageParam input(Float(32), 3, "input"); - - const unsigned int levels = 10; - - Func downsampled[levels]; - Func downx[levels]; - Func interpolated[levels]; - Func upsampled[levels]; - Func upsampledx[levels]; - Var x("x"), y("y"), c("c"); - - downsampled[0] = Func("downsampled"); - downx[0] = Func("downx"); - interpolated[0] = Func("interpolated"); - upsampled[0] = Func("upsampled"); - upsampledx[0] = Func("upsampledx"); - - Func clamped("clamped"); - clamped(x, y, c) = input(clamp(x, 0, input.width()-1), clamp(y, 0, input.height()-1), c); - - // This triggers a bug in llvm 3.3 (3.2 and trunk are fine), so we - // rewrite it in a way that doesn't trigger the bug. The rewritten - // form assumes the input alpha is zero or one. - // downsampled[0](x, y, c) = select(c < 3, clamped(x, y, c) * clamped(x, y, 3), clamped(x, y, 3)); - downsampled[0](x, y, c) = clamped(x, y, c) * clamped(x, y, 3); - - for (unsigned int l = 1; l < levels; ++l) { - downx[l] = Func("downx"); - downsampled[l] = Func("downsampled"); - downx[l](x, y, c) = (downsampled[l-1](x*2-1, y, c) + - 2.0f * downsampled[l-1](x*2, y, c) + - downsampled[l-1](x*2+1, y, c)) * 0.25f; - downsampled[l](x, y, c) = (downx[l](x, y*2-1, c) + - 2.0f * downx[l](x, y*2, c) + - downx[l](x, y*2+1, c)) * 0.25f; - } - interpolated[levels-1] = Func("interpolated"); - interpolated[levels-1](x, y, c) = downsampled[levels-1](x, y, c); - for (unsigned int l = levels-2; l < levels; --l) { - upsampledx[l] = Func("upsampledx"); - upsampled[l] = Func("upsampled"); - interpolated[l] = Func("interpolated"); - upsampledx[l](x, y, c) = select((x % 2) == 0, - interpolated[l+1](x/2, y, c), - 0.5f * (interpolated[l+1](x/2, y, c) + - interpolated[l+1](x/2+1, y, c))); - upsampled[l](x, y, c) = select((y % 2) == 0, - upsampledx[l](x, y/2, c), - 0.5f * (upsampledx[l](x, y/2, c) + - upsampledx[l](x, y/2+1, c))); - interpolated[l](x, y, c) = downsampled[l](x, y, c) + (1.0f - downsampled[l](x, y, 3)) * upsampled[l](x, y, c); - } - - Func normalize("normalize"); - normalize(x, y, c) = interpolated[0](x, y, c) / interpolated[0](x, y, 3); - - Func final("final"); - final(x, y, c) = normalize(x, y, c); - - AUTOTUNE_HOOK(final); - - int sched; - char *target = getenv("HL_TARGET"); - if (target && std::string(target) == "ptx") { - sched = 4; - } else { - sched = 2; - } - - switch (sched) { - case 0: - { - //std::cout << "Flat schedule." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - downsampled[l].compute_root(); - interpolated[l].compute_root(); - } - final.compute_root(); - break; - } - case 1: - { - //std::cout << "Flat schedule with vectorization." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - downsampled[l].compute_root().vectorize(x,4); - interpolated[l].compute_root().vectorize(x,4); - } - final.compute_root(); - break; - } - case 2: - { - Var xi, yi; - //std::cout << "Flat schedule with parallelization + vectorization." << std::endl; - clamped.compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - for (unsigned int l = 1; l < levels-1; ++l) { - if (l > 0) downsampled[l].compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - interpolated[l].compute_root().parallel(y).reorder(c, x, y).reorder_storage(c, x, y).vectorize(c, 4); - interpolated[l].unroll(x, 2).unroll(y, 2); - } - final.reorder(c, x, y).bound(c, 0, 3).parallel(y); - final.tile(x, y, xi, yi, 2, 2).unroll(xi).unroll(yi); - break; - } - case 3: - { - //std::cout << "Flat schedule with vectorization sometimes." << std::endl; - for (unsigned int l = 0; l < levels; ++l) { - if (l + 4 < levels) { - Var yo,yi; - downsampled[l].compute_root().vectorize(x,4); - interpolated[l].compute_root().vectorize(x,4); - } else { - downsampled[l].compute_root(); - interpolated[l].compute_root(); - } - } - final.compute_root(); - break; - } - case 4: - { - //std::cout << "GPU schedule." << std::endl; - - // Some gpus don't have enough memory to process the entire - // image, so we process the image in tiles. - Var yo, yi, xo, xi; - final.reorder(c, x, y).bound(c, 0, 3).vectorize(x, 4); - final.tile(x, y, xo, yo, xi, yi, input.width()/4, input.height()/4); - normalize.compute_at(final, xo).reorder(c, x, y).cuda_tile(x, y, 16, 16).unroll(c); - - // Start from level 1 to save memory - level zero will be computed on demand - for (unsigned int l = 1; l < levels; ++l) { - int tile_size = 32 >> l; - if (tile_size < 1) tile_size = 1; - if (tile_size > 16) tile_size = 16; - downsampled[l].compute_root().cuda_tile(x, y, c, tile_size, tile_size, 4); - interpolated[l].compute_at(final, xo).cuda_tile(x, y, c, tile_size, tile_size, 4); - } - - break; - } - default: - assert(0 && "No schedule with this number."); - } - - BASELINE_HOOK(final); - -#if 0 - // JIT compile the pipeline eagerly, so we don't interfere with timing - final.compile_jit(); - - // Image<float> in_png = load<float>(argv[1]); - Image<float> out(2048, 2048, 3); - // assert(in_png.channels() == 4); - // input.set(in_png); - final.infer_input_bounds(out); - - std::cout << "Running... " << std::endl; - double min = std::numeric_limits<double>::infinity(); - const unsigned int iters = 20; - - for (unsigned int x = 0; x < iters; ++x) { - double before = now(); - final.realize(out); - double after = now(); - double amt = after - before; - - std::cout << " " << amt * 1000 << std::endl; - if (amt < min) min = amt; - - } - std::cout << " took " << min * 1000 << " msec." << std::endl; - - // vector<Argument> args; - // args.push_back(input); - // final.compile_to_assembly("test.s", args); - // save(out, argv[2]); -#endif -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate.settings b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate.settings deleted file mode 100644 index 3a51d8062674581182fb204ddb943f85cd3b4de4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/interpolate.settings +++ /dev/null @@ -1,612 +0,0 @@ -{ - "functions": [ - { - "calls": [], - "name": "clamped", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "clamped" - ], - "name": "downsampled", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$10" - ], - "name": "downsampled$10", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$2" - ], - "name": "downsampled$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$3" - ], - "name": "downsampled$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$4" - ], - "name": "downsampled$4", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$5" - ], - "name": "downsampled$5", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$6" - ], - "name": "downsampled$6", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$7" - ], - "name": "downsampled$7", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$8" - ], - "name": "downsampled$8", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downx$9" - ], - "name": "downsampled$9", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$9" - ], - "name": "downx$10", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled" - ], - "name": "downx$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$2" - ], - "name": "downx$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$3" - ], - "name": "downx$4", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$4" - ], - "name": "downx$5", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$5" - ], - "name": "downx$6", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$6" - ], - "name": "downx$7", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$7" - ], - "name": "downx$8", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$8" - ], - "name": "downx$9", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$2", - "upsampled$9" - ], - "name": "interpolated$10", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled", - "upsampled$10" - ], - "name": "interpolated$11", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$10" - ], - "name": "interpolated$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$9", - "upsampled$2" - ], - "name": "interpolated$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$8", - "upsampled$3" - ], - "name": "interpolated$4", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$7", - "upsampled$4" - ], - "name": "interpolated$5", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$6", - "upsampled$5" - ], - "name": "interpolated$6", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$5", - "upsampled$6" - ], - "name": "interpolated$7", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$4", - "upsampled$7" - ], - "name": "interpolated$8", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "downsampled$3", - "upsampled$8" - ], - "name": "interpolated$9", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$11" - ], - "name": "normalize", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$10" - ], - "name": "upsampled$10", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$2" - ], - "name": "upsampled$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$3" - ], - "name": "upsampled$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$4" - ], - "name": "upsampled$4", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$5" - ], - "name": "upsampled$5", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$6" - ], - "name": "upsampled$6", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$7" - ], - "name": "upsampled$7", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$8" - ], - "name": "upsampled$8", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "upsampledx$9" - ], - "name": "upsampled$9", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$10" - ], - "name": "upsampledx$10", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$2" - ], - "name": "upsampledx$2", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$3" - ], - "name": "upsampledx$3", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$4" - ], - "name": "upsampledx$4", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$5" - ], - "name": "upsampledx$5", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$6" - ], - "name": "upsampledx$6", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$7" - ], - "name": "upsampledx$7", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$8" - ], - "name": "upsampledx$8", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "interpolated$9" - ], - "name": "upsampledx$9", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - }, - { - "calls": [ - "normalize" - ], - "name": "final", - "update_calls": [], - "vars": [ - "x", - "y", - "c" - ] - } - ], - "input_size": "1024, 1024, 3" -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/wavelet.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/wavelet.cpp deleted file mode 100644 index e2cb008790ac0161a2365388744cb9b482a6d7b8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/wavelet.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include "Halide.h" - -#define AUTOTUNE_HOOK(x) -#define BASELINE_HOOK(x) - -using namespace Halide; - -Var x("x"), y("y"), c("c"); - -Func haar_x(Func in) { - Func out; - out(x, y, c) = select(c == 0, - (in(2*x, y) + in(2*x+1, y)), - (in(2*x, y) - in(2*x+1, y)))/2; - out.unroll(c, 2); - return out; -} - -Func inverse_haar_x(Func in) { - Func out; - out(x, y) = select(x%2 == 0, - in(x/2, y, 0) + in(x/2, y, 1), - in(x/2, y, 0) - in(x/2, y, 1)); - out.unroll(x, 2); - return out; -} - - -const float D0 = 0.4829629131445341f; -const float D1 = 0.83651630373780772f; -const float D2 = 0.22414386804201339f; -const float D3 = -0.12940952255126034f; - -/* -const float D0 = 0.34150635f; -const float D1 = 0.59150635f; -const float D2 = 0.15849365f; -const float D3 = -0.1830127f; -*/ - -Func daubechies_x(Func in) { - Func out; - out(x, y, c) = select(c == 0, - D0*in(2*x-1, y) + D1*in(2*x, y) + D2*in(2*x+1, y) + D3*in(2*x+2, y), - D3*in(2*x-1, y) - D2*in(2*x, y) + D1*in(2*x+1, y) - D0*in(2*x+2, y)); - //out.unroll(c, 2); - return out; -} - -Func inverse_daubechies_x(Func in) { - Func out("inv_daub_x"); - out(x, y) = select(x%2 == 0, - D2*in(x/2, y, 0) + D1*in(x/2, y, 1) + D0*in(x/2+1, y, 0) + D3*in(x/2+1, y, 1), - D3*in(x/2, y, 0) - D0*in(x/2, y, 1) + D1*in(x/2+1, y, 0) - D2*in(x/2+1, y, 1)); - //out.unroll(x, 2); - return out; -} - -int main(int argc, char **argv) { - - ImageParam image(Float(32), 2); - ImageParam wavelet(Float(32), 3); - - // Add a boundary condition for daubechies - Func clamped; - clamped(x, y) = image(clamp(x, 0, image.width()-1), - clamp(y, 0, image.height()-1)); - Func wavelet_clamped("wavelet_clamped"); - wavelet_clamped(x, y, c) = wavelet(clamp(x, 0, wavelet.width()-1), - clamp(y, 0, wavelet.height()-1), c); - - - // Func inv_haar_x = inverse_haar_x(wavelet_clamped); - // inv_haar_x.compile_to_file("inverse_haar_x", wavelet); - - // Func for_haar_x = haar_x(clamped); - // for_haar_x.compile_to_file("haar_x", image); - - Func inv_daub_x = inverse_daubechies_x(wavelet_clamped); - //inv_daub_x.compile_to_file("inverse_daubechies_x", wavelet); - - AUTOTUNE_HOOK(inv_daub_x); - inv_daub_x.unroll(x, 2).vectorize(x, 8).parallel(y); - BASELINE_HOOK(inv_daub_x); - - // Func for_daub_x = daubechies_x(clamped); - //for_daub_x.compile_to_file("daubechies_x", image); - - return 0; -} - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/wavelet.settings b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/wavelet.settings deleted file mode 100644 index 6fbb5c4006dd77fee03f8635de2b22079eb0a908..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/apps/wavelet.settings +++ /dev/null @@ -1,4 +0,0 @@ -{"input_size": "2048, 2048", - "functions": [ - {"name": "wavelet_clamped", "vars": ["x", "y", "c"], "calls": []}, - {"name": "inv_daub_x", "vars": ["x", "y"], "calls": ["wavelet_clamped"]}]} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/halidetuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/halidetuner.py deleted file mode 100755 index 08e6732575557e41736fb71a321ee7190e181e7e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/halidetuner.py +++ /dev/null @@ -1,682 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -# -# Example of synthesizing Halide schedules using OpenTuner. This program -# expects a compiled version of Halide to exist at ~/Halide or at the location -# specified by --halide-dir. -# -# Halide programs must be modified by: -# 1) Inserting AUTOTUNE_HOOK(Func) directly after the algorithm definition -# in main() -# 2) Creating a settings file that describes the functions and variables -# (see apps/halide_blur.settings for an example) -# -# Halide can be found here: https://github.com/halide/Halide -# - -import adddeps # fix sys.path - -import argparse -import collections -import hashlib -import json -import logging -import math -import os -import re -import subprocess -import tempfile -import textwrap -from cStringIO import StringIO -from fn import _ -from pprint import pprint - -import opentuner -from opentuner.search.manipulator import ConfigurationManipulator -from opentuner.search.manipulator import PowerOfTwoParameter -from opentuner.search.manipulator import PermutationParameter -from opentuner.search.manipulator import BooleanParameter -from opentuner.search.manipulator import ScheduleParameter - - -COMPILE_CMD = ( - '{args.cxx} "{cpp}" -o "{bin}" -I "{args.halide_dir}/include" ' - '"{args.halide_dir}/bin/$BUILD_PREFIX/libHalide.a" -ldl -lcurses -lpthread {args.cxxflags} ' - '-DAUTOTUNE_N="{args.input_size}" -DAUTOTUNE_TRIALS={args.trials} ' - '-DAUTOTUNE_LIMIT={limit} -fno-rtti') - -log = logging.getLogger('halide') - -parser = argparse.ArgumentParser(parents=opentuner.argparsers()) -parser.add_argument('source', help='Halide source file annotated with ' - 'AUTOTUNE_HOOK') -parser.add_argument('--halide-dir', default=os.path.expanduser('~/Halide'), - help='Installation directory for Halide') -parser.add_argument('--input-size', - help='Input size to test with') -parser.add_argument('--trials', default=3, type=int, - help='Number of times to test each schedule') -parser.add_argument('--nesting', default=2, type=int, - help='Maximum depth for generated loops') -parser.add_argument('--max-split-factor', default=8, type=int, - help='The largest value a single split() can add') -parser.add_argument('--compile-command', default=COMPILE_CMD, - help='How to compile generated C++ code') -parser.add_argument('--cxx', default='c++', - help='C++ compiler to use (e.g., g++ or clang++)') -parser.add_argument('--cxxflags', default='', - help='Extra flags to the C++ compiler') -parser.add_argument('--tmp-dir', - default=('/run/shm' if os.access('/run/shm', os.W_OK) - else '/tmp'), - help='Where to store generated tests') -parser.add_argument('--settings-file', - help='Override location of json encoded settings') -parser.add_argument('--debug-error', - help='Stop on errors matching a given string') -parser.add_argument('--limit', type=float, default=30, - help='Kill compile + runs taking too long (seconds)') -parser.add_argument('--memory-limit', type=int, default=1024 ** 3, - help='Set memory ulimit on unix based systems') -parser.add_argument('--enable-unroll', action='store_true', - help='Enable .unroll(...) generation') -parser.add_argument('--enable-store-at', action='store_true', - help='Never generate .store_at(...)') -parser.add_argument('--gated-store-reorder', action='store_true', - help='Only reorder storage if a special parameter is given') -group = parser.add_mutually_exclusive_group() -group.add_argument('--random-test', action='store_true', - help='Generate a random configuration and run it') -group.add_argument('--random-source', action='store_true', - help='Generate a random configuration and print source ') -group.add_argument('--make-settings-file', action='store_true', - help='Create a skeleton settings file from call graph') - - -# class HalideRandomConfig(opentuner.search.technique.SearchTechnique): -# def desired_configuration(self): -# ''' -# inject random configs with no compute_at() calls to kickstart the search process -# ''' -# cfg = self.manipulator.random() -# for k in cfg.keys(): -# if re.match('.*_compute_level', k): -# cfg[k] = LoopLevel.INLINE -# return cfg -# -# technique.register(bandittechniques.AUCBanditMetaTechnique([ -# HalideRandomConfig(), -# differentialevolution.DifferentialEvolutionAlt(), -# evolutionarytechniques.UniformGreedyMutation(), -# evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3), -# ], name = "HalideMetaTechnique")) - - -class HalideTuner(opentuner.measurement.MeasurementInterface): - def __init__(self, args): - # args.technique = ['HalideMetaTechnique'] - super(HalideTuner, self).__init__(args, program_name=args.source) - timing_prefix = open(os.path.join(os.path.dirname(__file__), - 'timing_prefix.h')).read() - self.template = timing_prefix + open(args.source).read() - self.min_collection_cost = float('inf') - if not args.settings_file: - args.settings_file = os.path.splitext(args.source)[0] + '.settings' - if not args.make_settings_file: - with open(args.settings_file) as fd: - self.settings = json.load(fd) - self.post_dominators = post_dominators(self.settings) - if not args.input_size: - args.input_size = self.settings['input_size'] - else: - self.settings = None - self.post_dominators = None - args.input_size = '1, 1' - # set "program_version" based on hash of halidetuner.py, program source - h = hashlib.md5() - #with open(__file__) as src: - # h.update(src.read()) - with open(args.source) as src: - h.update(src.read()) - self._version = h.hexdigest() - - def compute_order_parameter(self, func): - name = func['name'] - schedule_vars = [] - schedule_deps = dict() - for var in func['vars']: - schedule_vars.append((var, 0)) - for i in xrange(1, self.args.nesting): - schedule_vars.append((var, i)) - schedule_deps[(var, i - 1)] = [(var, i)] - return ScheduleParameter('{0}_compute_order'.format(name), schedule_vars, - schedule_deps) - - def manipulator(self): - """ - The definition of the manipulator is meant to mimic the Halide::Schedule - data structure and defines the configuration space to search - """ - manipulator = HalideConfigurationManipulator(self) - manipulator.add_parameter(HalideComputeAtScheduleParameter( - 'schedule', self.args, self.settings['functions'], - self.post_dominators)) - for func in self.settings['functions']: - name = func['name'] - manipulator.add_parameter(PermutationParameter( - '{0}_store_order'.format(name), func['vars'])) - manipulator.add_parameter( - BooleanParameter('{0}_store_order_enabled'.format(name))) - manipulator.add_parameter(self.compute_order_parameter(func)) - for var in func['vars']: - manipulator.add_parameter(PowerOfTwoParameter( - '{0}_vectorize'.format(name), 1, self.args.max_split_factor)) - manipulator.add_parameter(PowerOfTwoParameter( - '{0}_unroll'.format(name), 1, self.args.max_split_factor)) - manipulator.add_parameter(BooleanParameter( - '{0}_parallel'.format(name))) - for nesting in xrange(1, self.args.nesting): - manipulator.add_parameter(PowerOfTwoParameter( - '{0}_splitfactor_{1}_{2}'.format(name, nesting, var), - 1, self.args.max_split_factor)) - - return manipulator - - def cfg_to_schedule(self, cfg): - """ - Produce a Halide schedule from a configuration dictionary - """ - o = StringIO() - cnt = 0 - temp_vars = list() - schedule = ComputeAtStoreAtParser(cfg['schedule'], self.post_dominators) - compute_at = schedule.compute_at - store_at = schedule.store_at - - # build list of all used variable names - var_names = dict() - var_name_order = dict() - for func in self.settings['functions']: - name = func['name'] - compute_order = cfg['{0}_compute_order'.format(name)] - for var in func['vars']: - var_names[(name, var, 0)] = var - for nesting in xrange(1, self.args.nesting): - split_factor = cfg.get('{0}_splitfactor_{1}_{2}'.format( - name, nesting, var), 0) - if split_factor > 1 and (name, var, nesting - 1) in var_names: - var_names[(name, var, nesting)] = '_{var}{cnt}'.format( - func=name, var=var, nesting=nesting, cnt=cnt) - temp_vars.append(var_names[(name, var, nesting)]) - cnt += 1 - var_name_order[name] = [var_names[(name, v, n)] for v, n in compute_order - if (name, v, n) in var_names] - - # set a schedule for each function - for func in self.settings['functions']: - name = func['name'] - inner_var_name = var_name_order[name][-1] # innermost variable in the reordered list for this func - vectorize = cfg['{0}_vectorize'.format(name)] - if self.args.enable_unroll: - unroll = cfg['{0}_unroll'.format(name)] - else: - unroll = 1 - - print >> o, 'Halide::Func(funcs["%s"])' % name - - for var in func['vars']: - # handle all splits - for nesting in xrange(1, self.args.nesting): - split_factor = cfg.get('{0}_splitfactor_{1}_{2}'.format( - name, nesting, var), 0) - if split_factor <= 1: - break - - for nesting2 in xrange(nesting + 1, self.args.nesting): - split_factor2 = cfg.get('{0}_splitfactor_{1}_{2}'.format( - name, nesting2, var), 0) - if split_factor2 <= 1: - break - split_factor *= split_factor2 - var_name = var_names[(name, var, nesting)] - last_var_name = var_names[(name, var, nesting - 1)] - - # apply unroll, vectorize factors to all surrounding splits iff we're the innermost var - if var_name == inner_var_name: - split_factor *= unroll - split_factor *= vectorize - - print >> o, '.split({0}, {0}, {1}, {2})'.format( - last_var_name, var_name, split_factor) - - # drop unused variables and truncate (Halide supports only 10 reorders) - if len(var_name_order[name]) > 1: - print >> o, '.reorder({0})'.format( - ', '.join(reversed(var_name_order[name][:10]))) - - # reorder_storage - store_order_enabled = cfg['{0}_store_order_enabled'.format(name)] - if store_order_enabled or not self.args.gated_store_reorder: - store_order = cfg['{0}_store_order'.format(name)] - if len(store_order) > 1: - print >> o, '.reorder_storage({0})'.format(', '.join(store_order)) - - if unroll > 1: - # apply unrolling to innermost var - print >> o, '.unroll({0}, {1})'.format( - var_name_order[name][-1], unroll * vectorize) - - if vectorize > 1: - # apply vectorization to innermost var - print >> o, '.vectorize({0}, {1})'.format( - var_name_order[name][-1], vectorize) - - # compute_at(not root) - if (compute_at[name] is not None and - len(var_name_order[compute_at[name][0]]) >= compute_at[name][1]): - at_func, at_idx = compute_at[name] - try: - at_var = var_name_order[at_func][-at_idx] - print >> o, '.compute_at(Halide::Func(funcs["{0}"]), {1})'.format(at_func, at_var) - if not self.args.enable_store_at: - pass # disabled - elif store_at[name] is None: - print >> o, '.store_root()' - elif store_at[name] != compute_at[name]: - at_func, at_idx = store_at[name] - at_var = var_name_order[at_func][-at_idx] - print >> o, '.store_at(Halide::Func(funcs["{0}"]), {1})'.format(at_func, at_var) - except IndexError: - # this is expected when at_idx is too large - # TODO: implement a cleaner fix - pass - # compute_root - else: - parallel = cfg['{0}_parallel'.format(name)] - if parallel: - # only apply parallelism to outermost var of root funcs - print >> o, '.parallel({0})'.format(var_name_order[name][0]) - print >> o, '.compute_root()' - - print >> o, ';' - - if temp_vars: - return 'Halide::Var {0};\n{1}'.format( - ', '.join(temp_vars), o.getvalue()) - else: - return o.getvalue() - - def schedule_to_source(self, schedule): - """ - Generate a temporary Halide cpp file with schedule inserted - """ - - def repl_autotune_hook(match): - tmpl = ''' - { - std::map<std::string, Halide::Internal::Function> funcs = Halide::Internal::find_transitive_calls((%(func)s).function()); - - %(sched)s - - _autotune_timing_stub(%(func)s); - }''' - return tmpl % {"sched": schedule.replace('\n', '\n '), "func": match.group(1)} - - source = re.sub(r'\n\s*AUTOTUNE_HOOK\(\s*([a-zA-Z0-9_]+)\s*\)', - repl_autotune_hook, self.template) - return source - - def run_schedule(self, schedule, limit): - """ - Generate a temporary Halide cpp file with schedule inserted and run it - with our timing harness found in timing_prefix.h. - """ - return self.run_source(self.schedule_to_source(schedule), limit) - - def run_baseline(self): - """ - Generate a temporary Halide cpp file with schedule inserted and run it - with our timing harness found in timing_prefix.h. - """ - - def repl_autotune_hook(match): - return '\n\n_autotune_timing_stub(%s);' % match.group(1) - - source = re.sub(r'\n\s*BASELINE_HOOK\(\s*([a-zA-Z0-9_]+)\s*\)', - repl_autotune_hook, self.template) - return self.run_source(source) - - def run_source(self, source, limit=0, extra_args=''): - cmd = '' - with tempfile.NamedTemporaryFile(suffix='.cpp', prefix='halide', - dir=self.args.tmp_dir) as cppfile: - cppfile.write(source) - cppfile.flush() - # binfile = os.path.splitext(cppfile.name)[0] + '.bin' - # binfile = '/tmp/halide.bin' - binfile = '' - with tempfile.NamedTemporaryFile(suffix='.bin', prefix='halide', - dir=self.args.tmp_dir, delete=False) as binfiletmp: - - binfile = binfiletmp.name # unique temp file to allow multiple concurrent tuner runs - assert(binfile) - cmd = self.args.compile_command.format( - cpp=cppfile.name, bin=binfile, args=self.args, - limit=math.ceil(limit) if limit < float('inf') else 0) - cmd += ' ' + extra_args - compile_result = self.call_program(cmd, limit=self.args.limit, - memory_limit=self.args.memory_limit) - if compile_result['returncode'] != 0: - log.error('compile failed: %s', compile_result) - return None - - try: - result = self.call_program(binfile, - limit=self.args.limit, - memory_limit=self.args.memory_limit) - stdout = result['stdout'] - stderr = result['stderr'] - returncode = result['returncode'] - - if result['timeout']: - log.info('compiler timeout %d (%.2f+%.0f cost)', self.args.limit, - compile_result['time'], self.args.limit) - return float('inf') - elif returncode == 142 or returncode == -14: - log.info('program timeout %d (%.2f+%.2f cost)', math.ceil(limit), - compile_result['time'], result['time']) - return None - elif returncode != 0: - log.error('invalid schedule (returncode=%d): %s', returncode, - stderr.strip()) - with tempfile.NamedTemporaryFile(suffix='.cpp', prefix='halide-error', - dir=self.args.tmp_dir, delete=False) as errfile: - errfile.write(source) - log.error('failed schedule logged to %s.\ncompile as `%s`.', errfile.name, cmd) - if self.args.debug_error is not None and ( - self.args.debug_error in stderr - or self.args.debug_error == ""): - self.debug_schedule('/tmp/halideerror.cpp', source) - return None - else: - try: - time = json.loads(stdout)['time'] - except: - log.exception('error parsing output: %s', result) - return None - log.info('success: %.4f (collection cost %.2f + %.2f)', - time, compile_result['time'], result['time']) - self.min_collection_cost = min( - self.min_collection_cost, result['time']) - return time - finally: - os.unlink(binfile) - - def run_cfg(self, cfg, limit=0): - try: - schedule = self.cfg_to_schedule(cfg) - except: - log.exception('error generating schedule') - return None - return self.run_schedule(schedule, limit) - - def run(self, desired_result, input, limit): - time = self.run_cfg(desired_result.configuration.data, limit) - if time is not None: - return opentuner.resultsdb.models.Result(time=time) - else: - return opentuner.resultsdb.models.Result(state='ERROR', - time=float('inf')) - - def save_final_config(self, configuration): - """called at the end of tuning""" - print 'Final Configuration:' - print self.cfg_to_schedule(configuration.data) - - def debug_log_schedule(self, filename, source): - open(filename, 'w').write(source) - print 'offending schedule written to {0}'.format(filename) - - def debug_schedule(self, filename, source): - self.debug_log_schedule(filename, source) - raw_input('press ENTER to continue') - - def make_settings_file(self): - dump_call_graph_dir = os.path.join(os.path.dirname(__file__), - 'dump-call-graph') - if not os.path.isdir(dump_call_graph_dir): - subprocess.check_call(['git', 'clone', - 'http://github.com/halide/dump-call-graph.git']) - assert os.path.isdir(dump_call_graph_dir) - - dump_call_graph_cpp = os.path.join(dump_call_graph_dir, 'DumpCallGraph.cpp') - callgraph_file = self.args.settings_file + '.callgraph' - - def repl_autotune_hook(match): - return r'''dump_call_graph("%s", %s); - printf("{\"time\": 0}\n"); - exit(0);''' % (callgraph_file, match.group(1)) - - source = re.sub(r'\n\s*AUTOTUNE_HOOK\(\s*([a-zA-Z0-9_]+)\s*\)', - repl_autotune_hook, self.template) - # TODO: BUG! - this only works correctly if given an absolute path to the - # program (or explicit settings file). Otherwise it generates the callgraph - # in a tmp dir somewhere and fails to find it in a local path here. - source = open(dump_call_graph_cpp).read() + source - self.run_source(source, extra_args='-I{0}'.format(dump_call_graph_dir)) - callgraph = json.load(open(callgraph_file)) - settings = {'input_size': '1024, 1024', 'functions': callgraph} - json.dump(settings, open(self.args.settings_file, 'w'), sort_keys=True, - indent=2) - print textwrap.dedent(''' - - {0} has been generated based on call graph of program. - - This file likely needs some manual tweaks in order to work correctly. - The input size should be changed to have the right number of dimensions. - Any naming differences between variable names and function names must - be applied manually. Some temporary variables not in the source code - need to be manually removed. - - '''.format(self.args.settings_file)) - - -class ComputeAtStoreAtParser(object): - """ - A recursive descent parser to force proper loop nesting, and enforce post - dominator scheduling constraints - - For each function input will have tokens like: - ('foo', 's') = store_at location for foo - ('foo', '2'), ('foo', '1') = opening the loop nests for foo, - the inner 2 variables - ('foo', 'c') = the computation of foo, and closing all loop nests - - The order of these tokens define a loop nest tree which we reconstruct - """ - - def __init__(self, tokens, post_dominators): - self.tokens = list(tokens) # input, processed back to front - self.post_dominators = post_dominators - self.compute_at = dict() - self.store_at = dict() - self.process_root() - - def process_root(self): - old_len = len(self.tokens) - out = [] - while self.tokens: - if self.tokens[-1][1] == 's': - # store at root - self.store_at[self.tokens[-1][0]] = None - out.append(self.tokens.pop()) - else: - self.process_loopnest(out, []) - self.tokens = list(reversed(out)) - assert old_len == len(self.tokens) - - def process_loopnest(self, out, stack): - func, idx = self.tokens[-1] - out.append(self.tokens.pop()) - if idx != 'c': - raise Exception('Invalid schedule') - - self.compute_at[func] = None - for targ_func, targ_idx in reversed(stack): - if targ_func in self.post_dominators[func]: - self.compute_at[func] = (targ_func, targ_idx) - break - - close_tokens = [(f, i) for f, i in self.tokens if f == func and i != 's'] - while close_tokens: - if self.tokens[-1] == close_tokens[-1]: - # proper nesting - close_tokens.pop() - out.append(self.tokens.pop()) - elif self.tokens[-1][1] == 'c': - self.process_loopnest(out, stack + close_tokens[-1:]) - elif self.tokens[-1][1] == 's': - # self.tokens[-1] is computed at this level - if func in self.post_dominators[self.tokens[-1][0]]: - self.store_at[self.tokens[-1][0]] = close_tokens[-1] - else: - self.store_at[self.tokens[-1][0]] = None - out.append(self.tokens.pop()) - else: - # improper nesting, just close the loop and search/delete close_tokens - out.extend(reversed(close_tokens)) - self.tokens = [x for x in self.tokens if x not in close_tokens] - break - - -class HalideConfigurationManipulator(ConfigurationManipulator): - def __init__(self, halide_tuner): - super(HalideConfigurationManipulator, self).__init__() - self.halide_tuner = halide_tuner - - def hash_config(self, config): - """ - Multiple configs can lead to the same schedule, so we provide a custom - hash function that hashes the resulting schedule instead of the raw config. - This will lead to fewer duplicate tests. - """ - self.normalize(config) - try: - schedule = self.halide_tuner.cfg_to_schedule(config) - return hashlib.sha256(schedule).hexdigest() - except: - log.warning('error hashing config', exc_info=True) - return super(HalideConfigurationManipulator, self).hash_config(config) - - -class HalideComputeAtScheduleParameter(ScheduleParameter): - def __init__(self, name, args, functions, post_dominators): - """ - Custom ScheduleParameter that normalizes using ComputeAtStoreAtParser - """ - super(HalideComputeAtScheduleParameter, self).__init__( - name, *self.gen_nodes_deps(args, functions)) - self.post_dominators = post_dominators - - def gen_nodes_deps(self, args, functions): - """ - Compute the list of nodes and point-to-point deps to provide to base class - """ - nodes = list() - deps = collections.defaultdict(list) - for func in functions: - last = None - for idx in reversed(['c'] + # 'c' = compute location (and close loops) - range(1, len(func['vars']) * args.nesting + 1) + - ['s']): # 's' = storage location - name = (func['name'], idx) - if last is not None: - # variables must go in order - deps[last].append(name) - last = name - nodes.append(name) - if idx == 'c': - # computes must follow call graph order - for callee in func['calls']: - deps[(callee, 'c')].append(name) - return nodes, deps - - def normalize(self, cfg): - """ - First enforce basic point-to-point deps (in base class), then call - ComputeAtStoreAtParser to normalize schedule. - """ - super(HalideComputeAtScheduleParameter, self).normalize(cfg) - cfg[self.name] = ComputeAtStoreAtParser(cfg[self.name], - self.post_dominators).tokens - - -def post_dominators(settings): - """ - Compute post dominator tree using textbook iterative algorithm for the - call graph defined in settings - """ - functions = [f['name'] for f in settings['functions']] - calls = dict([(f['name'], set(f['calls'])) for f in settings['functions']]) - inverse_calls = collections.defaultdict(set) - for k, callees in calls.items(): - for v in callees: - inverse_calls[v].add(k) - dom = {functions[-1]: set([functions[-1]])} - for f in functions[:-1]: - dom[f] = set(functions) - change = True - while change: - change = False - for f in functions[:-1]: - old = dom[f] - dom[f] = set([f]) | reduce( - _ & _, [dom[c] for c in inverse_calls[f]], set(functions)) - if old != dom[f]: - change = True - return dom - - -def random_test(args): - """ - Generate and run a random schedule - """ - - opentuner.tuningrunmain.init_logging() - m = HalideTuner(args) - cfg = m.manipulator().random() - pprint(cfg) - print - schedule = m.cfg_to_schedule(cfg) - print schedule - print - print 'Schedule', m.run_schedule(schedule, 30) - print 'Baseline', m.run_baseline() - - -def random_source(args): - """ - Dump the source code of a random schedule - """ - opentuner.tuningrunmain.init_logging() - m = HalideTuner(args) - cfg = m.manipulator().random() - schedule = m.cfg_to_schedule(cfg) - source = m.schedule_to_source(schedule) - print source - - -def main(args): - if args.random_test: - random_test(args) - elif args.random_source: - random_source(args) - elif args.make_settings_file: - opentuner.tuningrunmain.init_logging() - HalideTuner(args).make_settings_file() - else: - HalideTuner.main(args) - - -if __name__ == '__main__': - main(parser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/timing_prefix.h b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/timing_prefix.h deleted file mode 100644 index d8bbc5f57b6177f3a88a28d57fef2d72bf8c3050..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/halide/timing_prefix.h +++ /dev/null @@ -1,100 +0,0 @@ -#include <Halide.h> -#include <stdio.h> -#include <sys/time.h> -#include <unistd.h> - -#include <map> -#include <string> - -// How many times to run (and take min) -// #define AUTOTUNE_TRIALS 3 - -// Limit in seconds to try running for (0 = no limit) -// #define AUTOTUNE_LIMIT 0 - -// Size to run with -// #define AUTOTUNE_N 1024, 1024 - -inline void _autotune_timing_stub(Halide::Func& func) { - func.compile_jit(); - - // TODO: this assumes scalar/non-Tuple outputs - should generalize to a Realization - std::vector<Halide::Type> out_types = func.output_types(); - std::vector<buffer_t> out_raw_bufs; - std::vector<Halide::Buffer> out_bufs; - - for (int i = 0; i < out_types.size(); i++) { - // Use the Buffer constructor as a helper to set up the buffer_t, - // but then throw away its allocation which we don't really want. - Halide::Buffer bufinit(out_types[i], AUTOTUNE_N); - out_raw_bufs.push_back(*bufinit.raw_buffer()); - out_raw_bufs[i].host = NULL; - // TODO: free the host pointer?! - out_bufs.push_back(Halide::Buffer(out_types[i], &out_raw_bufs[i])); - assert(out_bufs[i].host_ptr() == NULL); // make sure we don't have an allocation - } - Halide::Realization output(out_bufs); - func.infer_input_bounds(output); - // assert(output[0].host_ptr()); // for now, the API doesn't seem to allocate outputs - - // TODO: this should go into Func::infer_input_bounds(Realization) - for (int i = 0; i < output.size(); i++) { - assert(!output[i].host_ptr()); // for now, the API doesn't seem to allocate outputs - buffer_t buf = *output[i].raw_buffer(); - - // Figure out how much memory to allocate for this buffer - size_t min_idx = 0, max_idx = 0; - for (int d = 0; d < 4; d++) { - if (buf.stride[d] > 0) { - min_idx += buf.min[d] * buf.stride[d]; - max_idx += (buf.min[d] + buf.extent[d] - 1) * buf.stride[d]; - } else { - max_idx += buf.min[d] * buf.stride[d]; - min_idx += (buf.min[d] + buf.extent[d] - 1) * buf.stride[d]; - } - } - size_t total_size = (max_idx - min_idx); - while (total_size & 0x1f) total_size++; - - // Allocate enough memory with the right dimensionality. - Halide::Buffer buffer(output[i].type(), total_size, - buf.extent[1] > 0 ? 1 : 0, - buf.extent[2] > 0 ? 1 : 0, - buf.extent[3] > 0 ? 1 : 0); - - // Rewrite the buffer fields to match the ones returned - for (int d = 0; d < 4; d++) { - buffer.raw_buffer()->min[d] = buf.min[d]; - buffer.raw_buffer()->stride[d] = buf.stride[d]; - buffer.raw_buffer()->extent[d] = buf.extent[d]; - } - - output[i] = buffer; - } - - timeval t1, t2; - double rv = 0; - const unsigned int timeout = AUTOTUNE_LIMIT; - alarm(timeout); - for (int i = 0; i < AUTOTUNE_TRIALS; i++) { - gettimeofday(&t1, NULL); - func.realize(output); - gettimeofday(&t2, NULL); - alarm(0); // disable alarm - double t = (t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec)/1000000.0; - if(i == 0 || t < rv) - rv = t; - } - printf("{\"time\": %.10f}\n", rv); - exit(0); -} - - -#ifndef AUTOTUNE_HOOK -#define AUTOTUNE_HOOK(x) -#endif - -#ifndef BASELINE_HOOK -#define BASELINE_HOOK(x) -#endif - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/HPL.dat.mako b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/HPL.dat.mako deleted file mode 100644 index 93354a2292a3bb3ddec1e2278e49b039b72d6bb1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/HPL.dat.mako +++ /dev/null @@ -1,31 +0,0 @@ -HPLinpack benchmark input file -Innovative Computing Laboratory, University of Tennessee -HPL.out output file name (if any) -0 device out (6=stdout,7=stderr,file) -1 # of problems sizes (N) -${size} Ns -1 # of NBs -${blocksize} NBs -${row_or_colmajor_pmapping} PMAP process mapping (0=Row-,1=Column-major) -1 # of process grids (P x Q) -2 Ps PxQ must equal nprocs -2 Qs -16.0 threshold -1 # of panel fact -${pfact} PFACTs (0=left, 1=Crout, 2=Right) -1 # of recursive stopping criterium -${nbmin} NBMINs (>= 1) -1 # of panels in recursion -${ndiv} NDIVs -1 # of recursive panel fact. -${rfact} RFACTs (0=left, 1=Crout, 2=Right) -1 # of broadcast -${bcast} BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) -1 # of lookahead depth -${depth} DEPTHs (>=0) -${swap} SWAP (0=bin-exch,1=long,2=mix) -${swapping_threshold} swapping threshold (default had 64) -${L1_transposed} L1 in (0=transposed,1=no-transposed) form -${U_transposed} U in (0=transposed,1=no-transposed) form -1 Equilibration (0=no,1=yes) -${mem_alignment} memory alignment in double (> 0) (4,8,16) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/hpl.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/hpl.py deleted file mode 100644 index 4cbbe798249b61eae23b5142337a056ef58e83bd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/hpl/hpl.py +++ /dev/null @@ -1,98 +0,0 @@ -import adddeps #fix sys.path - -import argparse -import logging - -import opentuner -from opentuner.search.manipulator import (ConfigurationManipulator, - IntegerParameter, - FloatParameter) -from opentuner.search.objective import MinimizeTime -from opentuner.measurement import MeasurementInterface -from opentuner.measurement.inputmanager import FixedInputManager -from opentuner.tuningrunmain import TuningRunMain - -log = logging.getLogger(__name__) - -parser = argparse.ArgumentParser(parents=opentuner.argparsers()) - -parser.add_argument('--size', type=int, default=800, - help='dimensions for the HPL matrix') -parser.add_argument('--nprocs', type=int, default=4, - help='number of processors for each HPL run (minimum=4)') -parser.add_argument('--xhpl', type=str, default="hpl-2.1/bin/OSX/xhpl", - help='location of xhpl binary') - -class HPLinpack(MeasurementInterface): - def run(self, desired_result, input, limit): - self.output_hpl_datfile(desired_result.configuration.data) - import subprocess, os - binary = self.args.xhpl - subprocess.call(["mpirun", "-np", str(self.args.nprocs), binary]) - - val = self.get_time_from_hpl_output() - - return opentuner.resultsdb.models.Result(time=val) - - def manipulator(self): - #FIXME: should some of these be expressed as booleans or switch parameters? - #FIXME: how to express P and Q, given PxQ=nprocs, with nprocs being fixed? - #FIXME: how to express logscaled parameter with a particular base? - manipulator = ConfigurationManipulator() - manipulator.add_parameter(IntegerParameter("blocksize", 1, 64)) - manipulator.add_parameter(IntegerParameter("row_or_colmajor_pmapping", 0, 1)) - manipulator.add_parameter(IntegerParameter("pfact", 0, 2)) - manipulator.add_parameter(IntegerParameter("nbmin", 1, 4)) - manipulator.add_parameter(IntegerParameter("ndiv", 2, 2)) - manipulator.add_parameter(IntegerParameter("rfact", 0, 4)) - manipulator.add_parameter(IntegerParameter("bcast", 0, 5)) - manipulator.add_parameter(IntegerParameter("depth", 0, 4)) - manipulator.add_parameter(IntegerParameter("swap", 0, 2)) - manipulator.add_parameter(IntegerParameter("swapping_threshold", 64, 128)) - manipulator.add_parameter(IntegerParameter("L1_transposed", 0, 1)) - manipulator.add_parameter(IntegerParameter("U_transposed", 0, 1)) - manipulator.add_parameter(IntegerParameter("mem_alignment", 4, 16)) - - return manipulator - - def output_hpl_datfile(self, params): - """HPL uses an input file to express the parameters, and this uses mako to render it.""" - params["size"] = self.args.size - from mako.template import Template - template = Template(filename="HPL.dat.mako") - with open("HPL.dat", "w") as f: - f.write(template.render(**params)) - - def get_time_from_hpl_output(self, fname="HPL.out"): - """Returns the elapsed time only, from the HPL output file""" - #FIXME: clean up with REs - elapsed = 0.0 - with open(fname) as f: - line = f.readline() - while (line[0:3] != "T/V"): - line = f.readline() - line = f.readline() - while (line[0:3] != "T/V"): - line = f.readline() - f.readline() # line of dashes - splitted = f.readline().split() - elapsed = float(splitted[5]) - - return elapsed - - - def program_name(self): - return "HPL" - - def program_version(self): - return "size=%d,nprocs=%d" % (self.args.size, self.args.nprocs) - - def save_final_config(self, configuration): - ''' - called at the end of autotuning with the best resultsdb.models.Configuration - ''' - print "Final configuration", configuration.data - -if __name__ == '__main__': - args = parser.parse_args() - HPLinpack.main(args) \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/README.md b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/README.md deleted file mode 100644 index f094e987f5e48d72aef426b39ef268e86f47e3c0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/README.md +++ /dev/null @@ -1,33 +0,0 @@ -This is an OpenTuner-based tuner that learns a series of button presses that complete the first level of Super Mario Bros. for the original Nintendo Entertainment System. - -## Dependencies - -- FCEUX, a NES emulator -- `xvfb-run`, to run the emulator headless (optional, but speeds up tuning) -- Super Mario Bros., assumed to be named `smb.nes`, which we can't help you get for legal reasons - -## Running - -Run the tuner with `./mario.py --technique=PSO_GA_Bandit`; it will launch FCEUX to run trials. You can experiment with other techniques or `--parallelism` (the number of trials to run in parallel) too. - -You can implement your own configuration representation by subclassing Representation and passing `--representation=YourRepresentation`. Your Representation class needs to provide a ConfigurationManipulator populated with parameters and a method to translate these parameters to button presses. There are already a few representations implemented to use as examples. - -You can implement your own fitness function by subclassing FitnessFunction and passing `--fitness-function=YourFunction`. Your function receives a win/loss boolean, the number of pixels moved to the right when the trial ended, and the number of frames that elapsed during the trial. Lower fitness scores are better. There are a few existing fitness functions; in particular, `ProgressTimesAverageSpeed` also tries to optimize speed. - -If you want to watch the trials (or don't have `xvfb-run` available), pass `--headful`. - -## Playing the results - -When a tuning run completes, the best configuration (as judged by the fitness function) is written to `<hostname>-<tuningrun>.fm2`. This file can be played back in FCEUX to watch the best configuration. - -You can also use the `--tuning-run=` option (passing the tuning run number in the best configuration `.fm2`) to generate a new-bests `.fm2`, which will contain each tuning trial that was the best configuration found so far during the tuning run, concatenated back-to-back. You also need to pass `--database` pointing to the database containing that tuning run, and if you passed `--representation` or `--fitness-function` during the tuning run, you need to pass the same values for those parameters. So your final command might look like `./mario.py --tuning-run=42 --database=opentuner.db/hostname.db --representation=NaiveRepresentation --fitness-function=ProgressTimesAverageSpeed > new-bests-42.fm2`. - -## TODO - -- use the [fm2 format](http://www.fceux.com/web/help/fceux.html?fm2.html)'s subtitle support in new-bests movies to show run number and fitness score - -## Links - -- [Videos showing OpenTuner playing Super Mario Bros](https://www.youtube.com/playlist?list=PLngnz1zPEA08FWy8wF9JbGqjlm-elHmlb) -- [Slides describing representation and results](http://groups.csail.mit.edu/commit/papers/2014/ansel-pact14-opentuner-slides.pdf) (see slide 16) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/fceux-hook.lua b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/fceux-hook.lua deleted file mode 100644 index ce00288149936154f5dc64f94cdbcbcba04d4758..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/fceux-hook.lua +++ /dev/null @@ -1,25 +0,0 @@ -player_state_addr = 0x000E; -player_state_dying = 6; -player_float_addr = 0x001D; -player_float_flagpole = 3; -player_page_addr = 0x006D; -player_horizpos_addr = 0x0086; -minimum_frames = 197; - -emu.speedmode("maximum"); -while true do - if (emu.framecount() > minimum_frames) then - --dead? - local dead = memory.readbyte(player_state_addr) == player_state_dying; - --flagpole? - local won = memory.readbyte(player_float_addr) == player_float_flagpole; - if (dead or won) then - local str = (dead and "died" or "won"); - local x_pos = math.floor(memory.readbyteunsigned(player_page_addr)*256 + memory.readbyteunsigned(player_horizpos_addr)); - local framecount = emu.framecount(); - io.write(str, " ", x_pos, " ", framecount, "\n"); - os.exit(0); - end; - end; - emu.frameadvance(); -end diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/mario.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/mario.py deleted file mode 100755 index d388321e1f3b4f70a5d1262f43b89702ea924c79..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/mario/mario.py +++ /dev/null @@ -1,341 +0,0 @@ -#!/usr/bin/env python2 - -"""OpenTuner plays Super Mario Bros. for NES - -We write a movie file and ask the emulator to play it back while running -fceux-hook.lua, which checks for death/flagpole and prints the fitness to -stdout where OpenTuner, as the parent process, can read it. -""" - -import adddeps #fix sys.path -import argparse -import base64 -import pickle -import tempfile -import subprocess -import re -import zlib -import abc -import sys -import os -import traceback -import collections -import socket - -import opentuner -from opentuner.search.manipulator import ConfigurationManipulator, IntegerParameter, EnumParameter, BooleanParameter -from opentuner.measurement import MeasurementInterface -from opentuner.measurement.inputmanager import FixedInputManager -from opentuner.tuningrunmain import TuningRunMain -from opentuner.search.objective import MinimizeTime - -def instantiate(class_name): - return getattr(sys.modules[__name__], class_name)() - -argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) -argparser.add_argument('--tuning-run', type=int, help='concatenate new bests from given tuning run into single movie') -argparser.add_argument('--headful', action='store_true', help='run headful (not headless) for debugging or live demo') -argparser.add_argument('--xvfb-delay', type=int, default=0, help='delay between launching xvfb and fceux') -argparser.add_argument('--representation', default='DurationRepresentation', type=instantiate, help='name of representation class') -argparser.add_argument('--fitness-function', default='Progress', type=instantiate, help='name of fitness function class') - -def call_or_die(command, failmsg=None): - try: - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() - return stdout, stderr, p.returncode - except: - print "Failed to execute", command - traceback.print_exc() - print "Child traceback:" - print sys.exc_info()[1].child_traceback - if failmsg: - print failmsg - sys.exit(1) - -# Functions for building FCEUX movie files (.fm2 files) - -def fm2_line(up, down, left, right, a, b, start, select, reset=False): - """formats one frame of input with the given button presses""" - return ''.join(('|1|' if reset else '|0|') + - ('R' if right else '.') + - ('L' if left else '.') + - ('D' if down else '.') + - ('U' if up else '.') + - ('T' if start else '.') + - ('D' if select else '.') + - ('B' if b else '.') + - ('A' if a else '.') + - '|........||') - -def maxd(iterable, default): - try: - return max(iterable) - except ValueError: - return default - -def fm2_lines(up, down, left, right, a, b, start, select, reset=set(), minFrame=None, maxFrame=None): - """formats many frames using the given button-press sets""" - if minFrame is None: - minFrame = 0 - if maxFrame is None: - maxFrame = max(maxd(up, 0), maxd(down, 0), maxd(left, 0), maxd(right, 0), maxd(a, 0), maxd(b, 0), maxd(start, 0), maxd(select, 0), maxd(reset, 0)) + 1 - lines = list() - for i in xrange(minFrame, maxFrame): - lines.append(fm2_line(i in up, i in down, i in left, i in right, i in a, i in b, i in start, i in select, i in reset)) - return lines - -def fm2_smb_header(): - return ["version 3", - "emuVersion 9828", - "romFilename smb.nes", - "romChecksum base64:jjYwGG411HcjG/j9UOVM3Q==", - "guid 51473540-E9D7-11E3-ADFC-46CE3219C4E0", - "fourscore 0", - "port0 1", - "port1 1", - "port2 0"] - -def fm2_smb(left, right, down, b, a, header=True, padding=True, minFrame=None, maxFrame=None): - reset = set() - start = set() - if padding: - left = set([x+196 for x in left]) - right = set([x+196 for x in right]) - down = set([x+196 for x in down]) - b = set([x+196 for x in b]) - a = set([x+196 for x in a]) - reset.add(0) - start.add(33) - lines = fm2_lines(set(), down, left, right, a, b, start, set(), reset, minFrame, maxFrame) - if header: - return "\n".join(fm2_smb_header() + lines) - else: - return "\n".join(lines) - -display_numbers = collections.deque() - -def run_movie(fm2, args): - with tempfile.NamedTemporaryFile(suffix=".fm2", delete=True) as f: - f.write(fm2) - f.flush() - cmd = [] - if not args.headful: - display = display_numbers.pop() - cmd += ["xvfb-run", "-n", display, "-w", str(args.xvfb_delay), "-e", "/dev/stderr"] - cmd += ["fceux", "--playmov", f.name, "--loadlua", - "fceux-hook.lua", "--nogui", "--volume", "0", "--no-config", "1", - "smb.nes"] - stdout, stderr, returncode = call_or_die(cmd) - if not args.headful: - display_numbers.append(display) - match = re.search(r"^(won|died) (\d+) (\d+)$", stdout, re.MULTILINE) - if not match: - print stderr - print stdout - raise ValueError - wl = match.group(1) - x_pos = int(match.group(2)) - framecount = int(match.group(3)) - return (wl, x_pos, framecount) - -class Representation(object): - """Interface for pluggable tuning representations.""" - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def manipulator(): - """Return a ConfigurationManipulator for this representation.""" - pass - - @abc.abstractmethod - def interpret(cfg): - """Unpack this representation into button-press sets (L, R, D, B, A).""" - pass - -class NaiveRepresentation(Representation): - """Uses a parameter per (button, frame) pair.""" - def manipulator(self): - m = ConfigurationManipulator() - for i in xrange(0, 12000): - m.add_parameter(BooleanParameter('L{}'.format(i))) - m.add_parameter(BooleanParameter('R{}'.format(i))) - m.add_parameter(BooleanParameter('D{}'.format(i))) - m.add_parameter(BooleanParameter('B{}'.format(i))) - m.add_parameter(BooleanParameter('A{}'.format(i))) - return m - - def interpret(self, cfg): - left = set() - right = set() - down = set() - running = set() - jumping = set() - for i in xrange(0, 12000): - if cfg['L{}'.format(i)]: - left.add(i) - if cfg['R{}'.format(i)]: - right.add(i) - if cfg['D{}'.format(i)]: - down.add(i) - if cfg['B{}'.format(i)]: - running.add(i) - if cfg['A{}'.format(i)]: - jumping.add(i) - return left, right, down, running, jumping - -class DurationRepresentation(Representation): - def manipulator(self): - m = ConfigurationManipulator() - for i in xrange(0, 1000): - #bias 3:1 in favor of moving right - m.add_parameter(EnumParameter('move{}'.format(i), ["R", "L", "RB", "LB", "N", "LR", "LRB", "R2", "RB2", "R3", "RB3"])) - m.add_parameter(IntegerParameter('move_duration{}'.format(i), 1, 60)) - #m.add_parameter(BooleanParameter("D"+str(i))) - for i in xrange(0, 1000): - m.add_parameter(IntegerParameter('jump_frame{}'.format(i), 0, 24000)) - m.add_parameter(IntegerParameter('jump_duration{}'.format(i), 1, 32)) - return m - - def interpret(self, cfg): - left = set() - right = set() - down = set() - running = set() - start = 0 - for i in xrange(0, 1000): - move = cfg['move{}'.format(i)] - move_duration = cfg['move_duration{}'.format(i)] - if "R" in move: - right.update(xrange(start, start + move_duration)) - if "L" in move: - left.update(xrange(start, start + move_duration)) - if "B" in move: - running.update(xrange(start, start + move_duration)) - start += move_duration - jumping = set() - for i in xrange(0, 1000): - jump_frame = cfg['jump_frame{}'.format(i)] - jump_duration = cfg['jump_duration{}'.format(i)] - jumping.update(xrange(jump_frame, jump_frame + jump_duration)) - return left, right, down, running, jumping - -class AlphabetRepresentation(Representation): - def manipulator(self): - m = ConfigurationManipulator() - for i in xrange(0, 400*60): - m.add_parameter(EnumParameter('{}'.format(i), xrange(0, 16))) - return m - - def interpret(self, cfg): - left = set() - right = set() - down = set() - running = set() - jumping = set() - for i in xrange(0, 400*60): - bits = cfg[str(i)] - if bits & 1: - left.add(i) - if bits & 2: - right.add(i) - if bits & 4: - running.add(i) - if bits & 8: - jumping.add(i) - #if bits & 16: - # down.add(i) - return left, right, down, running, jumping - -class FitnessFunction(object): - """Interface for pluggable fitness functions.""" - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def __call__(won, x_pos, elapsed_frames): - """Return the fitness (float, lower is better).""" - pass - -class Progress(FitnessFunction): - def __call__(self, won, x_pos, elapsed_frames): - return -float(x_pos) - -class ProgressPlusTimeRemaining(FitnessFunction): - def __call__(self, won, x_pos, elapsed_frames): - """x_pos plus 1 for each frame remaining on the timer on a win. This results in a large discontinuity at wins. This was the fitness function used for the OpenTuner paper, though the paper only discussed time-to-first-win.""" - return -float(x_pos + 400*60 - elapsed_frames) if won else -float(x_pos) - -class ProgressTimesAverageSpeed(FitnessFunction): - def __call__(self, won, x_pos, elapsed_frames): - return -x_pos * (float(x_pos)/elapsed_frames) - -class SMBMI(MeasurementInterface): - def __init__(self, args): - super(SMBMI, self).__init__(args) - self.parallel_compile = True - self.args = args - - def manipulator(self): - return self.args.representation.manipulator() - - def compile(self, cfg, id): - left, right, down, running, jumping = self.args.representation.interpret(cfg) - fm2 = fm2_smb(left, right, down, running, jumping) - try: - wl, x_pos, framecount = run_movie(fm2, self.args) - except ValueError: - return opentuner.resultsdb.models.Result(state='ERROR', time=float('inf')) - print wl, x_pos, framecount - return opentuner.resultsdb.models.Result(state='OK', time=self.args.fitness_function("won" in wl, x_pos, framecount)) - - def run_precompiled(self, desired_result, input, limit, compile_result, id): - return compile_result - - def run(self, desired_result, input, limit): - pass - - def save_final_config(self, cfg): - left, right, down, running, jumping = args.representation.interpret(cfg.data) - fm2 = fm2_smb(left, right, down, running, jumping) - _, _, framecount = run_movie(fm2, self.args) - filename = '{}-{}.fm2'.format(socket.gethostname(), self.driver.tuning_run.id) - with open(filename, 'w') as f: - f.write(fm2_smb(left, right, down, running, jumping, maxFrame=framecount)) - -def new_bests_movie(args): - stdout, stderr, returncode = call_or_die(["sqlite3", args.database, "select configuration_id from result where tuning_run_id = %d and was_new_best = 1 order by collection_date;" % args.tuning_run]) - if returncode: - print "Error retrieving new-best configurations:", stderr - sys.exit(1) - cids = stdout.split() - print '\n'.join(fm2_smb_header()) - for cid in cids: - stdout, stderr, returncode = call_or_die(["sqlite3", args.database, "select quote(data) from configuration where id = %d;" % int(cid)]) - if returncode: - print "Error retriving configuration data:", cid, stderr - sys.exit(1) - cfg = pickle.loads(zlib.decompress(base64.b16decode(stdout.strip()[2:-1]))) - left, right, down, running, jumping = args.representation.interpret(cfg) - fm2 = fm2_smb(left, right, down, running, jumping) - _, _, framecount = run_movie(fm2, args) - print fm2_smb(left, right, down, running, jumping, header=False, maxFrame=framecount) - -if __name__ == '__main__': - args = argparser.parse_args() - call_or_die(["fceux", "--help"], failmsg="Is fceux on your PATH?") - if not args.headful: - call_or_die(["xvfb-run", "--help"], failmsg="Is xvfb-run on your PATH? (or, pass --headful)") - for n in xrange(99, 99 + args.parallelism): - display_numbers.append(str(n)) - if args.tuning_run: - call_or_die(["sqlite3", "-version"], failmsg="Is sqlite3 on your PATH?") - if args.database is not None: - new_bests_movie(args) - else: - print "must specify --database" - else: - if os.path.isfile('smb.nes'): - SMBMI.main(args) - else: - print "smb.nes not found" - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/.gitignore deleted file mode 100644 index a6e67132d61e0bd837b953376dc866031d5f742a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/.gitignore +++ /dev/null @@ -1 +0,0 @@ -linux_x86_64 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/README.md b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/README.md deleted file mode 100644 index e4b446468658463cab275862143c95add38d0eb5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/README.md +++ /dev/null @@ -1,24 +0,0 @@ -Source for PetaBricks binaries can be found at: - - https://github.com/petabricks/petabricks/ - - https://code.google.com/p/petabricks/ - - -Basic usage for running the raw programs is: -``` -./Prog --config=CONFIG -n=N --time --accuracy --max-sec=TIMEOUT --trials=1 - ---config=<STRING> - filename of the program configuration (see example in .cfg.default file) ---n=<INTEGER> - generate a random input of the given size and run it ---time - print timing results in xml format ---accuracy - print out accuracy of answer ---max-sec=<NUMBER> (default: 1.79769e+308) - terminate measurement if it exceeds the given number of seconds - -many more options are given by running ./Prog --help -``` - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/deps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/deps.py deleted file mode 100644 index c03a106a85827c1c4faed505b78d4f18e168c7e9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/deps.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import sys - -project_root = os.path.normpath(os.path.join( - os.path.dirname(os.path.abspath(__file__)), '../..')) -sys.path.insert(0, project_root) - - -try: - from lxml import etree -except ImportError: - try: - # Python 2.5 - import xml.etree.cElementTree as etree - except ImportError: - import xml.etree.ElementTree as etree - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/download_benchmarks.sh b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/download_benchmarks.sh deleted file mode 100755 index aaf333b455a0414575b338625e45b58db8188c5b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/download_benchmarks.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -if test -e linux_x86_64 -then - echo "benchmarks already downloaded" -else - wget -O- http://people.csail.mit.edu/jansel/petabricks_benchmarks_linux_x86_64.tar.bz2 | tar jxv -fi - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/import_old_result.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/import_old_result.py deleted file mode 100755 index 9add4ed035c50e2f7f21dc79f6af89571137b257..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/import_old_result.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python - -import adddeps # fix sys.path - -import argparse -import json -import logging -import os -import re -import sys -import uuid -import subprocess - -try: - from lxml import etree -except ImportError: - import xml.etree.ElementTree as etree - -import opentuner -from opentuner import resultsdb -from datetime import datetime -from datetime import timedelta -from opentuner.search.objective import ThresholdAccuracyMinimizeTime - -log = logging.getLogger(__name__) - -argparser = argparse.ArgumentParser() -argparser.add_argument('--database', default='opentuner.db/import.db') -argparser.add_argument('--limit', type=float, default=10) -argparser.add_argument('program') -argparser.add_argument('candidatelog') - - -def run(args, cfg): - limit = args.limit - cmd = [args.program, - '--time', - '--accuracy', - '--config=' + cfg, - '--max-sec=%.10f' % args.limit, - '-n=%d' % args.n] - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - - result = opentuner.resultsdb.models.Result() - try: - root = etree.XML(out) - result.time = float(root.find('stats/timing').get('average')) - result.accuracy = float(root.find('stats/accuracy').get('average')) - if result.time < limit + 3600: - result.state = 'OK' - else: - # time will be 2**31 if timeout - result.state = 'TIMEOUT' - except: - log.exception('run error') - log.warning('program crash, out = %s / err = %s', out, err) - result.state = 'ERROR' - result.time = float('inf') - result.accuracy = float('-inf') - return result - - -def main(args): - if '://' not in args.database: - args.database = 'sqlite:///' + args.database - engine, Session = opentuner.resultsdb.connect(args.database) - session = Session() - - program_settings = json.load(open(args.program + '.settings')) - args.n = program_settings['n'] - args.technique = ['Imported'] - objective = ThresholdAccuracyMinimizeTime(program_settings['accuracy']) - - tuningrun = resultsdb.models.TuningRun( - uuid=uuid.uuid4().hex, - name='import', - args=args, - start_date=datetime.now(), - objective=objective, - program_version=resultsdb.models.ProgramVersion.get( - session, 'PetaBricksInterface', args.program, 'imported'), - state='COMPLETE', - ) - session.add(tuningrun) - - for gen, line in enumerate(open(args.candidatelog)): - if line[0] != '#': - line = re.split('\t', line) - date = tuningrun.start_date + timedelta(seconds=float(line[0])) - cfg = os.path.normpath( - os.path.join(os.path.dirname(args.candidatelog), '..', line[5])) - result = run(args, cfg) - result.was_new_best = True - result.tuning_run = tuningrun - result.collection_date = date - session.add(result) - desired_result = resultsdb.models.DesiredResult( - limit=args.limit, - tuning_run=tuningrun, - generation=gen, - requestor='Imported', - request_date=date, - start_date=date, - result=result, - state='COMPLETE') - session.add(desired_result) - tuningrun.end_date = date - print gen, date, result.time - - session.commit() - - -if __name__ == '__main__': - opentuner.tuningrunmain.init_logging() - sys.exit(main(argparser.parse_args())) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/pbtuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/pbtuner.py deleted file mode 100755 index 7163294494eec42b73a232b6ee00fb7164d2c691..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/pbtuner.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python - -import adddeps # fix sys.path - -import re -import argparse -import logging -import subprocess -import tempfile -import json -from pprint import pprint - -import opentuner -from opentuner.search.manipulator import (ConfigurationManipulator, - IntegerParameter, - LogIntegerParameter, - FloatParameter, - LogFloatParameter, - SelectorParameter, - SwitchParameter, - PermutationParameter, ) - -try: - from lxml import etree -except ImportError: - import xml.etree.ElementTree as etree - -from opentuner.measurement import MeasurementInterface -from opentuner.measurement.inputmanager import FixedInputManager -from opentuner.search.objective import ThresholdAccuracyMinimizeTime - -log = logging.getLogger("pbtuner") - -parser = argparse.ArgumentParser(parents=opentuner.argparsers()) -parser.add_argument('program', - help='PetaBricks binary program to autotune') -parser.add_argument('--program-cfg-default', - help="override default program config exemplar location") -parser.add_argument('--program-cfg-output', - help="location final autotuned configuration is written") -parser.add_argument('--program-settings', - help="override default program settings file location") -parser.add_argument('--program-input', - help="use only a given input for autotuning") -parser.add_argument('--upper-limit', type=float, default=30, - help="time limit to apply to initial test") -parser.add_argument('--test-config', action='store_true') - - -class PetaBricksInterface(MeasurementInterface): - def __init__(self, args): - self.program_settings = json.load(open(args.program_settings)) - input_manager = FixedInputManager(size=self.program_settings['n']) - objective = ThresholdAccuracyMinimizeTime(self.program_settings['accuracy']) - - # pass many settings to parent constructor - super(PetaBricksInterface, self).__init__( - args, program_name=args.program, - program_version=self.file_hash(args.program), - input_manager=input_manager, objective=objective) - - def build_config(self, cfg): - r = dict() - - # direct copy - for k, v in cfg.iteritems(): - if k[0] != '.': - r[k] = v - - for name, choices in self.choice_sites.items(): - param = self.manipulator.parameters_dict(cfg)['.' + name] - lvl = 0 - for cutoff, choice in param.selector_iter(cfg): - lvl += 1 - r['%s_lvl%d_rule' % (name, lvl)] = choice - if lvl > 1: - r['%s_lvl%d_cutoff' % (name, lvl)] = cutoff - - return r - - def run(self, desired_result, input, limit): - limit = min(limit, self.args.upper_limit) - with tempfile.NamedTemporaryFile(suffix='.petabricks.cfg') as cfgtmp: - for k, v in self.build_config(desired_result.configuration.data).items(): - print >> cfgtmp, k, '=', v - cfgtmp.flush() - if args.program_input: - input_opts = ['--iogen-run=' + args.program_input, - '--iogen-n=%d' % input.input_class.size] - else: - input_opts = ['-n=%d' % input.input_class.size] - - cmd = [args.program, - '--time', - '--accuracy', - '--max-sec=%.8f' % limit, - '--config=' + cfgtmp.name] + input_opts - log.debug("cmd: %s", ' '.join(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - - result = opentuner.resultsdb.models.Result() - try: - root = etree.XML(out) - result.time = float(root.find('stats/timing').get('average')) - result.accuracy = float(root.find('stats/accuracy').get('average')) - if result.time < limit + 3600: - result.state = 'OK' - else: - #time will be 2**31 if timeout - result.state = 'TIMEOUT' - except: - log.warning("program crash, out = %s / err = %s", out, err) - result.state = 'ERROR' - result.time = float('inf') - result.accuracy = float('-inf') - return result - - def save_final_config(self, configuration): - """ - called at the end of autotuning with the best - resultsdb.models.Configuration - """ - with open(args.program_cfg_output, 'w') as fd: - cfg = self.build_config(configuration.data) - for k, v in sorted(cfg.items()): - print >> fd, k, '=', v - log.info("final configuration written to %s", args.program_cfg_output) - - def manipulator(self): - """create the configuration manipulator, from example config""" - upper_limit = self.program_settings['n'] + 1 - cfg = open(self.args.program_cfg_default).read() - manipulator = ConfigurationManipulator() - - self.choice_sites = dict() - - for m in re.finditer(r" *([a-zA-Z0-9_-]+)[ =]+([0-9e.+-]+) *" - r"[#] *([a-z]+).* ([0-9]+) to ([0-9]+)", cfg): - k, v, valtype, minval, maxval = m.group(1, 2, 3, 4, 5) - minval = float(minval) - maxval = float(maxval) - if upper_limit: - maxval = min(maxval, upper_limit) - assert valtype == 'int' - #log.debug("param %s %f %f", k, minval, maxval) - - m1 = re.match(r'(.*)_lvl[0-9]+_rule', k) - m2 = re.match(r'(.*)_lvl[0-9]+_cutoff', k) - if m1: - self.choice_sites[m1.group(1)] = int(maxval) - elif m2: - pass - elif k == 'worker_threads': - manipulator.add_parameter(IntegerParameter(k, 1, 16)) - elif k == 'distributedcutoff': - pass - elif minval == 0 and maxval < 64: - manipulator.add_parameter(SwitchParameter(k, maxval)) - else: - manipulator.add_parameter(LogIntegerParameter(k, minval, maxval)) - - for name, choices in self.choice_sites.items(): - manipulator.add_parameter( - SelectorParameter('.' + name, range(choices + 1), - upper_limit / choices)) - - self.manipulator = manipulator - return manipulator - - def test_config(self): - pprint(self.manipulator().random()) - - -if __name__ == '__main__': - args = parser.parse_args() - if not args.program_cfg_default: - args.program_cfg_default = args.program + '.cfg.default' - if not args.program_cfg_output: - args.program_cfg_output = args.program + '.cfg' - if not args.program_settings: - args.program_settings = args.program + '.settings' - if args.test_config: - PetaBricksInterface(args).test_config() - else: - PetaBricksInterface.main(args) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/testwrapper.sh b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/testwrapper.sh deleted file mode 100755 index 2b6a94e57a6b4205638dd0560da79482494ac20b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/petabricks/testwrapper.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -COUNT=50 -for Z in `seq $COUNT` -do - for T in `./pbtuner.py --list-techniques $@`; - do - echo $Z/$COUNT $T - ./pbtuner.py --technique=$T $@ - done -done - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/api_example.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/api_example.py deleted file mode 100755 index e87a8fffe1544714247b4435a3b5ed7d3f92eb03..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/api_example.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python -""" -Examples usage of a Python API interface to opentuner. - -Unlike the other examples, this code lets the user control the main() of -the program and calls into opentuner to get new configurations to test. -""" - -import adddeps # add opentuner to path in dev mode - -import opentuner -from opentuner.api import TuningRunManager -from opentuner.measurement.interface import DefaultMeasurementInterface -from opentuner.resultsdb.models import Result -from opentuner.search.manipulator import ConfigurationManipulator -from opentuner.search.manipulator import IntegerParameter -import logging -import argparse - -log = logging.getLogger(__name__) - - -def test_func(cfg): - x = cfg['x'] - y = (x - 10) * (x - 10) - log.debug("f({}) -> {}".format(x, y)) - return y - - -def main(): - parser = argparse.ArgumentParser(parents=opentuner.argparsers()) - args = parser.parse_args() - manipulator = ConfigurationManipulator() - manipulator.add_parameter(IntegerParameter('x', -200, 200)) - interface = DefaultMeasurementInterface(args=args, - manipulator=manipulator, - project_name='examples', - program_name='api_test', - program_version='0.1') - api = TuningRunManager(interface, args) - for x in xrange(500): - desired_result = api.get_next_desired_result() - if desired_result is None: - # The search space for this example is very small, so sometimes - # the techniques have trouble finding a config that hasn't already - # been tested. Change this to a continue to make it try again. - break - cfg = desired_result.configuration.data - result = Result(time=test_func(cfg)) - api.report_result(desired_result, result) - - best_cfg = api.get_best_configuration() - api.finish() - print 'best x found was', best_cfg['x'] - -if __name__ == '__main__': - main() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/multiple_tuning_runs.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/multiple_tuning_runs.py deleted file mode 100755 index 5e0918e3afe49ce7a819f36312d770cdb73a5003..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/py_api/multiple_tuning_runs.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/python -""" -Examples usage of a Python API interface to opentuner. - -Unlike the other examples, this code lets the user control the main() of -the program and calls into opentuner to get new configurations to test. - -This version runs multiple tuning runs at once in a single process. -""" - -import adddeps # add opentuner to path in dev mode - -import opentuner -from opentuner.api import TuningRunManager -from opentuner.measurement.interface import DefaultMeasurementInterface -from opentuner.resultsdb.models import Result -from opentuner.search.manipulator import ConfigurationManipulator -from opentuner.search.manipulator import IntegerParameter -import logging -import argparse - -log = logging.getLogger(__name__) - - -def test_func1(cfg): - x = cfg['x'] - y = (x - 10) * (x - 10) - log.debug("f({}) -> {}".format(x, y)) - return y - - -def test_func2(cfg): - x = cfg['x'] - y = (x + 10) * (x + 10) - log.debug("f({}) -> {}".format(x, y)) - return y - - -def test_func3(cfg): - x = cfg['x'] - y = (x + 20) * (x + 20) - log.debug("f({}) -> {}".format(x, y)) - return y - - -def create_test_tuning_run(db): - parser = argparse.ArgumentParser(parents=opentuner.argparsers()) - args = parser.parse_args() - args.database = db - manipulator = ConfigurationManipulator() - manipulator.add_parameter(IntegerParameter('x', -200, 200)) - interface = DefaultMeasurementInterface(args=args, - manipulator=manipulator, - project_name='examples', - program_name='api_test', - program_version='0.1') - api = TuningRunManager(interface, args) - return api - - -def main(): - apis = [create_test_tuning_run('sqlite:////tmp/a.db'), - create_test_tuning_run('sqlite:////tmp/b.db'), - create_test_tuning_run('sqlite:////tmp/c.db')] - test_funcs = [test_func1, test_func2, test_func3] - for x in xrange(100): - for api, test_func in zip(apis, test_funcs): - desired_result = api.get_next_desired_result() - if desired_result is None: - continue - cfg = desired_result.configuration.data - result = Result(time=test_func(cfg)) - api.report_result(desired_result, result) - - best_cfgs = [api.get_best_configuration() for api in apis] - for api in apis: - api.finish() - - print('best x configs: {}'.format(best_cfgs)) - -if __name__ == '__main__': - main() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/.gitignore b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/.gitignore deleted file mode 100644 index aa0571caf15bdf4665fee72a1d87051d12718127..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -rosenbrock.db -*.db -opentuner.log diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/rosenbrock.makefile b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/rosenbrock.makefile deleted file mode 100755 index 7b9be87a9a8ba698083ad4ac2c228ed3f11ed8df..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/rosenbrock.makefile +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/make -f -# use -j4 to run in parallel - -FN := rosenbrock -DIMS := 4 -TECHNIQUES := $(shell ./rosenbrock.py --list-techniques) -define test_loop -DB="sqlite:///opentuner.db/$$RUN.db"; \ -for TEQ in $(TECHNIQUES); do \ - ./rosenbrock.py --function=$(FN) \ - --technique=$$TEQ \ - --dimensions=$(DIMS) \ - --database=$$DB; \ -done; -endef - -default: run.1 run.2 run.3 run.4 run.5 run.6 run.7 run.8 run.9 run.10 run.11 \ -run.12 run.13 run.14 run.15 run.16 run.17 run.18 run.19 run.20 run.21 run.22 \ -run.23 run.24 run.25 run.26 run.27 run.28 run.29 run.30 - -run.%: - RUN=$* $(test_loop) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/rosenbrock.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/rosenbrock.py deleted file mode 100755 index da426f239bdcdd945eca8630db0c96a8a60544d6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/rosenbrock/rosenbrock.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python -# -# This is a simple testcase purely for testing the autotuner -# -# http://en.wikipedia.org/wiki/Rosenbrock_function -# -# Also supports some other test functions taken from: -# http://en.wikipedia.org/wiki/Test_functions_for_optimization -# - -import adddeps # fix sys.path - -import argparse -import logging - -import opentuner -from opentuner.measurement import MeasurementInterface -from opentuner.search.manipulator import ConfigurationManipulator -from opentuner.search.manipulator import FloatParameter - -log = logging.getLogger(__name__) - -parser = argparse.ArgumentParser(parents=opentuner.argparsers()) -parser.add_argument('--dimensions', type=int, default=2, - help='dimensions for the Rosenbrock function') -parser.add_argument('--domain', type=float, default=1000, - help='bound for variables in each dimension') -parser.add_argument('--function', default='rosenbrock', - choices=('rosenbrock', 'sphere', 'beale'), - help='function to use') - - -class Rosenbrock(MeasurementInterface): - def run(self, desired_result, input, limit): - cfg = desired_result.configuration.data - val = 0.0 - if self.args.function == 'rosenbrock': - # the actual rosenbrock function: - for d in xrange(self.args.dimensions - 1): - x0 = cfg[d] - x1 = cfg[d + 1] - val += 100.0 * (x1 - x0 ** 2) ** 2 + (x0 - 1) ** 2 - elif self.args.function == 'sphere': - for d in xrange(self.args.dimensions): - xi = cfg[d] - val += xi ** 2 - elif self.args.function == 'beale': - assert self.args.dimensions == 2 - assert self.args.domain == 4.5 - x = cfg[0] - y = cfg[1] - val = ((1.5 - x + x * y) ** 2 + - (2.25 - x + x * y ** 2) ** 2 + - (2.625 - x + x * y ** 3) ** 2) - return opentuner.resultsdb.models.Result(time=val) - - def manipulator(self): - manipulator = ConfigurationManipulator() - for d in xrange(self.args.dimensions): - manipulator.add_parameter(FloatParameter(d, - -self.args.domain, - self.args.domain)) - return manipulator - - def program_name(self): - return self.args.function - - def program_version(self): - return "%dx%d" % (self.args.dimensions, self.args.domain) - - def save_final_config(self, configuration): - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - -if __name__ == '__main__': - args = parser.parse_args() - if args.function == 'beale': - # fixed for this function - args.domain = 4.5 - args.dimensions = 2 - Rosenbrock.main(args) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/att48_d.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/att48_d.txt deleted file mode 100644 index b93e36ccfa194c574fd9473921fcee2d6820015c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/att48_d.txt +++ /dev/null @@ -1,48 +0,0 @@ - 0 4727 1205 6363 3657 3130 2414 563 463 5654 1713 1604 2368 2201 1290 1004 3833 2258 3419 2267 2957 720 1700 5279 2578 6076 3465 2654 3625 3115 1574 3951 1748 2142 6755 2383 3306 1029 3530 825 2188 4820 3489 1947 6835 1542 2379 3744 - 4727 0 3588 2012 1842 6977 6501 5187 5028 2327 4148 4723 3635 3125 4907 3930 7463 6338 7243 5105 4043 4022 3677 2863 3106 1850 7173 6630 1204 6814 6001 3447 5253 2656 3123 6274 7183 5622 3085 4564 2756 1591 7027 6186 3472 5461 4390 2088 - 1205 3588 0 5163 2458 3678 3071 1742 1444 4462 1184 1520 1498 1103 1501 951 4298 2903 3967 2169 2209 652 828 4136 1518 4873 3954 3254 2446 3581 2441 2960 1966 950 5564 2916 3878 2035 2482 1027 1395 3617 3891 2686 5661 2023 1867 2560 - 6363 2012 5163 0 2799 8064 7727 6878 6581 1402 5366 5946 4679 4378 6225 5709 8417 7578 8296 6135 4802 5707 4982 2322 4178 320 8186 7800 2778 7859 7408 3763 6461 4223 1427 7451 8263 7131 3669 6011 4638 1681 7987 7502 1877 6758 5360 2844 - 3657 1842 2458 2799 0 5330 4946 4200 3824 2012 2573 3157 1924 1580 3427 3179 5749 4793 5577 3409 2223 3066 2185 1860 1401 2491 5486 5035 894 5141 4611 1669 3677 1590 3113 4682 5533 4352 1252 3227 2426 1169 5313 4706 3241 3962 2651 304 - 3130 6977 3678 8064 5330 0 743 3209 2670 6929 2831 2266 3407 3854 2178 4076 727 881 293 1930 3310 3672 3315 6199 3932 7745 365 482 5774 261 1659 4513 1746 4431 7910 769 207 2225 4435 2681 5053 6384 550 1224 7805 1670 2704 5230 - 2414 6501 3071 7727 4946 743 0 2468 1952 6673 2380 1795 3051 3405 1604 3382 1469 168 1020 1681 3110 2993 2827 6009 3552 7412 1104 267 5300 821 916 4348 1270 3890 7698 332 900 1484 4185 2049 4415 6051 1219 482 7635 1054 2432 4884 - 563 5187 1742 6878 4200 3209 2468 0 718 6203 2241 2051 2920 2762 1687 1304 3932 2331 3487 2669 3487 1175 2260 5840 3141 6596 3563 2728 4120 3240 1559 4507 2082 2658 7304 2512 3364 985 4091 1319 2544 5358 3632 1987 7391 1785 2879 4296 - 463 5028 1444 6581 3824 2670 1952 718 0 5789 1602 1343 2330 2291 970 1451 3376 1796 2959 1951 2835 1112 1725 5346 2628 6285 3007 2193 3889 2661 1122 3920 1372 2391 6883 1927 2845 611 3543 676 2590 4993 3039 1486 6934 1112 2196 3876 - 5654 2327 4462 1402 2012 6929 6673 6203 5789 0 4392 4947 3648 3501 5274 5183 7216 6535 7140 5022 3621 5077 4090 922 3207 1131 7014 6714 2437 6707 6477 2476 5432 3599 1102 6376 7121 6284 2497 5160 4318 937 6795 6507 1268 5773 4249 1914 - 1713 4148 1184 5366 2573 2831 2380 2241 1602 4392 0 586 766 1029 883 2040 3353 2224 3100 1049 1246 1625 503 3841 1196 5054 3042 2488 2945 2676 2087 2331 1114 1650 5459 2132 3037 1958 1997 931 2513 3701 2923 2137 5459 1394 711 2534 - 1604 4723 1520 5946 3157 2266 1795 2051 1343 4947 586 0 1299 1612 406 2208 2824 1639 2542 694 1586 1767 1050 4357 1770 5633 2498 1907 3520 2128 1558 2778 531 2171 6003 1552 2472 1538 2506 791 2912 4277 2403 1564 5983 827 892 3109 - 2368 3635 1498 4679 1924 3407 3051 2920 2330 3648 766 1299 0 646 1642 2446 3840 2905 3655 1488 730 2096 697 3076 533 4363 3567 3122 2453 3219 2842 1592 1791 1480 4706 2772 3610 2721 1232 1656 2550 3001 3403 2860 4697 2126 756 1836 - 2201 3125 1103 4378 1580 3854 3405 2762 2291 3501 1029 1612 646 0 1853 2026 4349 3247 4119 1997 1341 1753 606 3078 419 4070 4052 3517 1923 3690 3032 1866 2142 838 4593 3161 4060 2788 1380 1663 1932 2736 3915 3138 4647 2395 1351 1592 - 1290 4907 1501 6225 3427 2178 1604 1687 970 5274 883 406 1642 1853 0 2029 2803 1438 2466 986 1987 1593 1253 4716 2072 5915 2454 1764 3710 2082 1204 3164 497 2287 6342 1419 2379 1134 2867 554 2885 4569 2405 1289 6338 555 1297 3406 - 1004 3930 951 5709 3179 4076 3382 1304 1451 5183 2040 2208 2446 2026 2029 0 4759 3220 4368 2900 3151 442 1765 4960 2444 5443 4396 3610 2932 4034 2572 3891 2525 1590 6278 3313 4261 2033 3398 1476 1241 4287 4390 2928 6419 2428 2749 3337 - 3833 7463 4298 8417 5749 727 1469 3932 3376 7216 3353 2824 3840 4349 2803 4759 0 1601 477 2359 3617 4345 3851 6433 4372 8098 370 1206 6267 726 2384 4754 2335 4991 8148 1452 609 2949 4752 3331 5687 6746 437 1948 8005 2334 3098 5618 - 2258 6338 2903 7578 4793 881 168 2331 1796 6535 2224 1639 2905 3247 1438 3220 1601 0 1165 1563 2988 2829 2666 5882 3401 7263 1233 399 5138 923 794 4227 1117 3724 7565 286 1049 1348 4051 1881 4248 5903 1322 355 7508 887 2302 4736 - 3419 7243 3967 8296 5577 293 1020 3487 2959 7140 3100 2542 3655 4119 2466 4368 477 1165 0 2170 3520 3965 3588 6393 4183 7977 202 767 6041 438 1932 4706 2027 4711 8107 1061 132 2503 4652 2972 5344 6617 486 1501 7989 1962 2939 5469 - 2267 5105 2169 6135 3409 1930 1681 2669 1951 5022 1049 694 1488 1997 986 2900 2359 1563 2170 0 1430 2460 1547 4333 2019 5817 2079 1694 3910 1733 1813 2668 654 2694 6029 1366 2130 1991 2525 1474 3542 4455 1923 1641 5957 1071 777 3302 - 2957 4043 2209 4802 2223 3310 3110 3487 2835 3621 1246 1586 730 1341 1987 3151 3617 2988 3520 1430 0 2779 1387 2905 1062 4482 3398 3119 2922 3087 3115 1240 1953 2175 4607 2796 3501 3119 1136 2173 3268 3136 3189 3029 4527 2355 711 2042 - 720 4022 652 5707 3066 3672 2993 1175 1112 5077 1625 1767 2096 1753 1593 442 4345 2829 3965 2460 2779 0 1401 4781 2166 5427 3984 3212 2946 3620 2224 3603 2089 1496 6178 2906 3861 1719 3132 1040 1479 4211 3969 2553 6290 2012 2336 3189 - 1700 3677 828 4982 2185 3315 2827 2260 1725 4090 503 1050 697 606 1253 1765 3851 2666 3588 1547 1387 1401 0 3621 903 4675 3537 2954 2475 3169 2427 2254 1578 1148 5177 2598 3521 2194 1833 1074 2054 3340 3423 2541 5213 1801 1077 2190 - 5279 2863 4136 2322 1860 6199 6009 5840 5346 922 3841 4357 3076 3078 4716 4960 6433 5882 6393 4333 2905 4781 3621 0 2718 2042 6254 6024 2569 5966 5913 1687 4807 3384 1716 5699 6384 5787 1852 4687 4285 1272 6022 5892 1629 5178 3581 1639 - 2578 3106 1518 4178 1401 3932 3552 3141 2628 3207 1196 1770 533 419 2072 2444 4372 3401 4183 2019 1062 2166 903 2718 0 3864 4097 3635 1932 3748 3274 1448 2284 1164 4286 3283 4136 3086 967 1973 2285 2507 3935 3331 4312 2589 1284 1340 - 6076 1850 4873 320 2491 7745 7412 6596 6285 1131 5054 5633 4363 4070 5915 5443 8098 7263 7977 5817 4482 5427 4675 2042 3864 0 7866 7483 2515 7539 7101 3449 6146 3938 1375 7134 7944 6831 3349 5709 4397 1363 7667 7190 1798 6446 5041 2528 - 3465 7173 3954 8186 5486 365 1104 3563 3007 7014 3042 2498 3567 4052 2454 4396 370 1233 202 2079 3398 3984 3537 6254 4097 7866 0 839 5973 374 2019 4569 1996 4669 7970 1085 305 2581 4532 2976 5339 6509 287 1581 7844 1974 2838 5369 - 2654 6630 3254 7800 5035 482 267 2728 2193 6714 2488 1907 3122 3517 1764 3610 1206 399 767 1694 3119 3212 2954 6024 3635 7483 839 0 5427 558 1181 4349 1377 4044 7723 356 653 1744 4218 2241 4614 6121 955 743 7644 1231 2465 4957 - 3625 1204 2446 2778 894 5774 5300 4120 3889 2437 2945 3520 2453 1923 3710 2932 6267 5138 6041 3910 2922 2946 2475 2569 1932 2515 5973 5427 0 5612 4824 2550 4050 1498 3476 5071 5980 4470 2096 3388 1911 1501 5831 4994 3704 4264 3209 1196 - 3115 6814 3581 7859 5141 261 821 3240 2661 6707 2676 2128 3219 3690 2082 4034 726 923 438 1733 3087 3620 3169 5966 3748 7539 374 558 5612 0 1716 4280 1624 4298 7679 735 420 2263 4216 2606 4967 6179 400 1277 7567 1609 2501 5032 - 1574 6001 2441 7408 4611 1659 916 1559 1122 6477 2087 1558 2842 3032 1204 2572 2384 794 1932 1813 3115 2224 2427 5913 3274 7101 2019 1181 4824 1716 0 4330 1180 3346 7545 1023 1808 578 4062 1438 3693 5763 2115 440 7537 763 2404 4603 - 3951 3447 2960 3763 1669 4513 4348 4507 3920 2476 2331 2778 1592 1866 3164 3891 4754 4227 4706 2668 1240 3603 2254 1687 1448 3449 4569 4349 2550 4280 4330 0 3184 2510 3402 4031 4698 4281 533 3245 3612 2187 4339 4265 3296 3576 1941 1381 - 1748 5253 1966 6461 3677 1746 1270 2082 1372 5432 1114 531 1791 2142 497 2525 2335 1117 2027 654 1953 2089 1578 4807 2284 6146 1996 1377 4050 1624 1180 3184 0 2685 6475 1022 1952 1341 2963 1050 3358 4787 1926 1086 6436 422 1244 3619 - 2142 2656 950 4223 1590 4431 3890 2658 2391 3599 1650 2171 1480 838 2287 1590 4991 3724 4711 2694 2175 1496 1148 3384 1164 3938 4669 4044 1498 4298 3346 2510 2685 0 4697 3693 4636 2975 1981 1909 1124 2718 4565 3548 4830 2839 2140 1751 - 6755 3123 5564 1427 3113 7910 7698 7304 6883 1102 5459 6003 4706 4593 6342 6278 8148 7565 8107 6029 4607 6178 5177 1716 4286 1375 7970 7723 3476 7679 7545 3402 6475 4697 0 7393 8097 7370 3515 6249 5379 2001 7738 7556 461 6829 5267 3013 - 2383 6274 2916 7451 4682 769 332 2512 1927 6376 2132 1552 2772 3161 1419 3313 1452 286 1061 1366 2796 2906 2598 5699 3283 7134 1085 356 5071 735 1023 4031 1022 3693 7393 0 965 1542 3883 1913 4286 5772 1121 600 7322 902 2128 4608 - 3306 7183 3878 8263 5533 207 900 3364 2845 7121 3037 2472 3610 4060 2379 4261 609 1049 132 2130 3501 3861 3521 6384 4136 7944 305 653 5980 420 1808 4698 1952 4636 8097 965 0 2380 4629 2877 5250 6583 570 1380 7986 1866 2904 5432 - 1029 5622 2035 7131 4352 2225 1484 985 611 6284 1958 1538 2721 2788 1134 2033 2949 1348 2503 1991 3119 1719 2194 5787 3086 6831 2581 1744 4470 2263 578 4281 1341 2975 7370 1542 2380 0 3952 1127 3197 5518 2658 1002 7395 951 2429 4380 - 3530 3085 2482 3669 1252 4435 4185 4091 3543 2497 1997 2506 1232 1380 2867 3398 4752 4051 4652 2525 1136 3132 1833 1852 967 3349 4532 4218 2096 4216 4062 533 2963 1981 3515 3883 4629 3952 0 2873 3080 2012 4324 4046 3478 3328 1755 1000 - 825 4564 1027 6011 3227 2681 2049 1319 676 5160 931 791 1656 1663 554 1476 3331 1881 2972 1474 2173 1040 1074 4687 1973 5709 2976 2241 3388 2606 1438 3245 1050 1909 6249 1913 2877 1127 2873 0 2374 4392 2943 1659 6285 1012 1563 3254 - 2188 2756 1395 4638 2426 5053 4415 2544 2590 4318 2513 2912 2550 1932 2885 1241 5687 4248 5344 3542 3268 1479 2054 4285 2285 4397 5339 4614 1911 4967 3693 3612 3358 1124 5379 4286 5250 3197 3080 2374 0 3386 5284 3997 5585 3386 3125 2664 - 4820 1591 3617 1681 1169 6384 6051 5358 4993 937 3701 4277 3001 2736 4569 4287 6746 5903 6617 4455 3136 4211 3340 1272 2507 1363 6509 6121 1501 6179 5763 2187 4787 2718 2001 5772 6583 5518 2012 4392 3386 0 6314 5837 2205 5095 3680 1169 - 3489 7027 3891 7987 5313 550 1219 3632 3039 6795 2923 2403 3403 3915 2405 4390 437 1322 486 1923 3189 3969 3423 6022 3935 7667 287 955 5831 400 2115 4339 1926 4565 7738 1121 570 2658 4324 2943 5284 6314 0 1676 7603 1964 2662 5184 - 1947 6186 2686 7502 4706 1224 482 1987 1486 6507 2137 1564 2860 3138 1289 2928 1948 355 1501 1641 3029 2553 2541 5892 3331 7190 1581 743 4994 1277 440 4265 1086 3548 7556 600 1380 1002 4046 1659 3997 5837 1676 0 7521 744 2325 4670 - 6835 3472 5661 1877 3241 7805 7635 7391 6934 1268 5459 5983 4697 4647 6338 6419 8005 7508 7989 5957 4527 6290 5213 1629 4312 1798 7844 7644 3704 7567 7537 3296 6436 4830 461 7322 7986 7395 3478 6285 5585 2205 7603 7521 0 6805 5208 3102 - 1542 5461 2023 6758 3962 1670 1054 1785 1112 5773 1394 827 2126 2395 555 2428 2334 887 1962 1071 2355 2012 1801 5178 2589 6446 1974 1231 4264 1609 763 3576 422 2839 6829 902 1866 951 3328 1012 3386 5095 1964 744 6805 0 1644 3928 - 2379 4390 1867 5360 2651 2704 2432 2879 2196 4249 711 892 756 1351 1297 2749 3098 2302 2939 777 711 2336 1077 3581 1284 5041 2838 2465 3209 2501 2404 1941 1244 2140 5267 2128 2904 2429 1755 1563 3125 3680 2662 2325 5208 1644 0 2532 - 3744 2088 2560 2844 304 5230 4884 4296 3876 1914 2534 3109 1836 1592 3406 3337 5618 4736 5469 3302 2042 3189 2190 1639 1340 2528 5369 4957 1196 5032 4603 1381 3619 1751 3013 4608 5432 4380 1000 3254 2664 1169 5184 4670 3102 3928 2532 0 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/p01_d.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/p01_d.txt deleted file mode 100644 index 0464ad3143b4dff3176414a0b343f762ae5379b7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/p01_d.txt +++ /dev/null @@ -1,15 +0,0 @@ - 0 29 82 46 68 52 72 42 51 55 29 74 23 72 46 - 29 0 55 46 42 43 43 23 23 31 41 51 11 52 21 - 82 55 0 68 46 55 23 43 41 29 79 21 64 31 51 - 46 46 68 0 82 15 72 31 62 42 21 51 51 43 64 - 68 42 46 82 0 74 23 52 21 46 82 58 46 65 23 - 52 43 55 15 74 0 61 23 55 31 33 37 51 29 59 - 72 43 23 72 23 61 0 42 23 31 77 37 51 46 33 - 42 23 43 31 52 23 42 0 33 15 37 33 33 31 37 - 51 23 41 62 21 55 23 33 0 29 62 46 29 51 11 - 55 31 29 42 46 31 31 15 29 0 51 21 41 23 37 - 29 41 79 21 82 33 77 37 62 51 0 65 42 59 61 - 74 51 21 51 58 37 37 33 46 21 65 0 61 11 55 - 23 11 64 51 46 51 51 33 29 41 42 61 0 62 23 - 72 52 31 43 65 29 46 31 51 23 59 11 62 0 59 - 46 21 51 64 23 59 33 37 11 37 61 55 23 59 0 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/p01_s.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/p01_s.txt deleted file mode 100644 index 38afab553d2a9c23c1abda12a95f6367d5d093e2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/p01_s.txt +++ /dev/null @@ -1,16 +0,0 @@ - 1 -13 - 2 -15 - 9 - 5 - 7 - 3 -12 -14 -10 - 8 - 6 - 4 -11 - 1 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/tsp.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/tsp.py deleted file mode 100755 index 0ddff5156497331daffef9b7385a20d63423bbd0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tsp/tsp.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -# -# This is a simple testcase purely for testing the autotuner on permutations -# -# http://en.wikipedia.org/wiki/Travelling_salesman_problem -# - -import adddeps #fix sys.path - -import argparse -import logging - -import opentuner -from opentuner.search.manipulator import (ConfigurationManipulator, - PermutationParameter) -from opentuner.search.objective import MinimizeTime -from opentuner.measurement import MeasurementInterface -from opentuner.measurement.inputmanager import FixedInputManager -from opentuner.tuningrunmain import TuningRunMain - - -parser = argparse.ArgumentParser(parents=opentuner.argparsers()) -parser.add_argument('data', help='distance matrix file') - -class TSP(MeasurementInterface): - def __init__(self, args): - super(TSP, self).__init__(args) - data = args.data - m = open(data).readlines() - self.distance = [[int(i) for i in l.split()] for l in m] - - def run(self, desired_result, input, limit): - cfg = desired_result.configuration.data - p = cfg[0] # cheating: should use manipulator function - t = self.eval_path(p) - return opentuner.resultsdb.models.Result(time=t) - - def eval_path(self, p): - """ Given permutation of cities as a list of indices, - return total path length """ - out = sum(self.distance[p[i]][p[i+1]] for i in range(len(p)-1)) -## print out, p - return out - - def manipulator(self): - manipulator = ConfigurationManipulator() - manipulator.add_parameter(PermutationParameter(0, range(len(self.distance)))) - return manipulator - - def solution(self): - p = [1,13,2,15,9,5,7,3,12,14,10,8,6,4,11] - return self.eval_path(p) - - - -if __name__ == '__main__': - args = parser.parse_args() - TSP.main(args) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/#accuracy_tuner.py# b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/#accuracy_tuner.py# deleted file mode 100644 index 2110d0d692831e37f30023af05b92a0d91d9623c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/#accuracy_tuner.py# +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys - - -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -opt_confs_index = 9 -evaluated_configs = {} - - -def extractTotalOverhead(file_name): - - total_comps = 0.0 - file = open(file_name, "r") - for x in file: - words = x.split() - total_comps += float(words[opt_confs_index]) - - print total_comps - return total_comps - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print run_cmd - run_result_call_program = self.call_program(run_cmd) - #print run_result_call_program - - total_comps = extractTotalOverhead("accuracy_summary") - accuracy = getAccuracy("final_accuracy") - - #Result = opentuner.resultsdb.models.Result(time=total_comps) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('accuracy_summary', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - return Result - - - def save_final_config(self, configuration): - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - createFlagsFile("opentuner_flags", configuration.data) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_final_' + str(accuracy) ) - - sorted_list = sorted(self.configs_list, key = lambda tup: tup[0]) - print sorted_list[0:10] - - top_elems = 20 - if len(sorted_list) < top_elems: - top_elems = len(sorted_list) - - - for i in range(top_elems): - createFlagsFile("opentuner_flags", sorted_list[i][2]) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_' + str(accuracy) + "_rank_" + str(i) ) - - - #os.mkdir(result_dir + "full_results") - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='name of binary to run') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune for') - argparser.add_argument('--error-range', type=int, help='num of flags to tune for') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='accuracy threshold') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/accuracy_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/accuracy_tuner.py deleted file mode 100644 index 5977fe7ee5b4780139d2c5a865c8231361cf0f2c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/accuracy_tuner.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python -# - -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys - - -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -opt_confs_index = 9 -evaluated_configs = {} - - -def extractTotalOverhead(file_name): - - total_comps = 0.0 - file = open(file_name, "r") - for x in file: - words = x.split() - total_comps += float(words[opt_confs_index]) - - print total_comps - return total_comps - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print run_cmd - run_result_call_program = self.call_program(run_cmd) - #print run_result_call_program - - total_comps = extractTotalOverhead("accuracy_summary") - accuracy = getAccuracy("final_accuracy") - - #Result = opentuner.resultsdb.models.Result(time=total_comps) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('accuracy_summary', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - return Result - - - def save_final_config(self, configuration): - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - createFlagsFile("opentuner_flags", configuration.data) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_final_' + str(accuracy) ) - - sorted_list = sorted(self.configs_list, key = lambda tup: tup[0]) - print sorted_list[0:10] - - top_elems = 20 - if len(sorted_list) < top_elems: - top_elems = len(sorted_list) - - - for i in range(top_elems): - createFlagsFile("opentuner_flags", sorted_list[i][2]) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_' + str(accuracy) + "_rank_" + str(i) ) - - - #os.mkdir(result_dir + "full_results") - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='name of binary to run') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune for') - argparser.add_argument('--error-range', type=int, help='num of flags to tune for') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='accuracy threshold') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/accuracy_tuner_piped.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/accuracy_tuner_piped.py deleted file mode 100644 index 6d46c5762ead377292337c47d045ee5e58322954..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/accuracy_tuner_piped.py +++ /dev/null @@ -1,269 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import argparse -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.measurement.inputmanager import FixedInputManager -import shutil -import os -import sys -import subprocess -import threading -import psutil - -from measure_confidence import dump_high_confidence_files -from select_top_results import select_top_results -from time import sleep - - -output_dir = "" -flag_ranges = [] -tuning_flags = [] -binary_name = "" -accuracy_threshold = 10.0 -opt_confs_index = 9 -evaluated_configs = {} -orig_result_dir = "" - - -def extractTotalOverhead(file_name): - - total_comps = 0.0 - file = open(file_name, "r") - for x in file: - words = x.split() - total_comps += float(words[opt_confs_index]) - - print total_comps - return total_comps - - -def getAccuracy(file_name): - - file = open(file_name, "r") - acc_str = file.read() - file.close() - - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - -def kill(proc_pid): - process = psutil.Process(proc_pid) - for proc in process.children(recursive=True): - proc.kill() - process.kill() - - - -def createFlagsFile(file_name, cfg): - - f = open(file_name, "w+") - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += str(flag_value) + "\n" - - f.write(cmd_config) - f.close() - - -class ClangFlagsTuner(MeasurementInterface): - - def __init__(self, args): - objective = ThresholdAccuracyMinimizeTime(accuracy_threshold) - input_manager = FixedInputManager(size=num_flags) - self.configs_list = [] - - super(ClangFlagsTuner, self).__init__( - args, program_name=args.binary, - program_version=self.file_hash(args.binary), - input_manager=input_manager, objective=objective) - - - FNULL = open(os.devnull, 'wb') - #run_result_call_program = self.call_program(run_cmd) - self.start_process = subprocess.Popen([binary_name, "opentuner_run"]) #, stdout=FNULL); - - try: - os.mkfifo("/tmp/myfifo") - except OSError, e: - print("FIFO exists") - - - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, flag_ranges - # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - )) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - - def run(self, desired_result, input, limit): - - """ - Run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - # NOTE: creates the file with flags read by the runtime - createFlagsFile("opentuner_flags", cfg) - - run_cmd = binary_name - print run_cmd - #run_result_call_program = self.call_program(run_cmd) - - # Using Named Pipes to signal execution to the DNN outer thread - fifo = open("/tmp/myfifo", "w") - fifo.write("start_run") - fifo.close() - - print "Waiting for process to signal back - when done processing one run" - - fifo2 = open("/tmp/myfifo", "r") - fifo2.read() - fifo2.close() - - print "Process Signalled back" - - total_comps = extractTotalOverhead("accuracy_summary") - accuracy = getAccuracy("final_accuracy") - - - #Result = opentuner.resultsdb.models.Result(time=total_comps) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - Result.accuracy = accuracy - - if accuracy > accuracy_threshold: - if accuracy not in evaluated_configs: - config_tuple = (total_comps, accuracy, cfg) - self.configs_list.append(config_tuple) - evaluated_configs[accuracy] = 1 - shutil.copy('accuracy_summary', output_dir + '/' + binary_name + '_' + str(accuracy)) - - - print "done with one run" - - return Result - - - def save_final_config(self, configuration): - - print "Dumping High Confidence results" - sleep(5) - - # Only dumping files with 95% confidence - dump_high_confidence_files(binary_name, orig_result_dir, accuracy_threshold, 95) - select_top_results(orig_result_dir + "/high_confidence") - - - #self.start_process.kill() - kill(self.start_process.pid) - - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - print "Final configuration", configuration.data - - return - - - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - createFlagsFile("opentuner_flags", configuration.data) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_final_' + str(accuracy) ) - - sorted_list = sorted(self.configs_list, key = lambda tup: tup[0]) - print sorted_list[0:10] - - top_elems = 20 - if len(sorted_list) < top_elems: - top_elems = len(sorted_list) - - - for i in range(top_elems): - createFlagsFile("opentuner_flags", sorted_list[i][2]) - run_cmd = binary_name - run_result_call_program = self.call_program(run_cmd) - accuracy = getAccuracy("final_accuracy") - shutil.copy('accuracy_summary', result_dir + '/' + binary_name + '_' + str(accuracy) + "_rank_" + str(i) ) - - - #os.mkdir(result_dir + "full_results") - - - - -if __name__ == '__main__': - - argparser = argparse.ArgumentParser(parents=opentuner.argparsers()) - argparser.add_argument('--binary', help='name of binary to run') - argparser.add_argument('--num-flags', type=int, help='num of flags to tune for') - argparser.add_argument('--error-range', type=int, help='num of flags to tune for') - argparser.add_argument('--accuracy', type=float, help='accuracy threshold') - argparser.add_argument('--result-dir', help='accuracy threshold') - - - args = argparser.parse_args() - binary_name = str(args.binary) - print("binary_name = ", binary_name) - num_flags = int(args.num_flags) - error_range = int(args.error_range) - accuracy_threshold = float(args.accuracy) - print("accuracy = ", accuracy_threshold) - result_dir = args.result_dir - orig_result_dir = result_dir - if result_dir == "": - print("Provide --result-dir ") - - - output_dir = result_dir + "/full_results" - print output_dir - if not os.path.exists(result_dir): - os.mkdir(result_dir) - - if not os.path.exists(output_dir): - print("Creating output directory = ", output_dir) - os.mkdir(output_dir) - - for j in range(error_range): - flag_ranges.append(j) - - print("flag_ranges = ", flag_ranges) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/adddeps.py deleted file mode 100644 index 72de04cf55e138a5ee5d0fdaf11da4b692045706..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/adddeps.py +++ /dev/null @@ -1,5 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/gettingstarted.md b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/gettingstarted.md deleted file mode 100644 index 8a442c5f44d6c501f686125d4468ca642f745920..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/gettingstarted.md +++ /dev/null @@ -1,215 +0,0 @@ ---- -layout: default -title: OpenTuner - Using OpenTuner -permalink: /tutorial/gettingstarted/index.html ---- - -Tutorial: Optimizing Block Matrix Multiplication -================================================ - -This tutorial assumes that you have checked out a copy of opentuner. For -guidelines on how to get opentuner set up, refer [here][setup]. - -[setup]: http://opentuner.org/tutorial/setup/ - -Identifying a Program to Autotune ---------------------------------- - -In order to do autotuning, you first need something to autotune. This will -normally be your own program that you want to make either fast or better in -some way. For this tutorial we will use a blocked version of matrix multiply -as an example. We will use opentuner to find the optimal value of the block -size parameter. - -We will autotune the sample code below(based off of modification of code -found [here][matrix-multiply-code]), making sure to take the block size as -a compile time constant to the program. - -[matrix-multiply-code]: http://csapp.cs.cmu.edu/public/waside/waside-blocking.pdf - -Save the sample code below to examples/tutorials/mmm_block.cpp - - #include <stdio.h> - #include <cstdlib> - - #define N 100 - - int main(int argc, const char** argv) - { - - int n = BLOCK_SIZE * (N/BLOCK_SIZE); - int a[N][N]; - int b[N][N]; - int c[N][N]; - int sum=0; - for(int k1=0;k1<n;k1+=BLOCK_SIZE) - { - for(int j1=0;j1<n;j1+=BLOCK_SIZE) - { - for(int k1=0;k1<n;k1+=BLOCK_SIZE) - { - for(int i=0;i<n;i++) - { - for(int j=j1;j<j1+BLOCK_SIZE;j++) - { - sum = c[i][j]; - for(int k=k1;k<k1+BLOCK_SIZE;k++) - { - sum += a[i][k] * b[k][j]; - } - c[i][j] = sum; - } - } - } - } - } - return 0; - } - -Creating a New Autotuner with Opentuner ------------------------------------- -Now we need to create a program that uses OpenTuner to optimize the program we just saved. - -Save the following code to examples/tutorials/mmm_tuner.py - - #!/usr/bin/env python - # - # Optimize blocksize of apps/mmm_block.cpp - # - # This is an extremely simplified version meant only for tutorials - # - import adddeps # fix sys.path - - import opentuner - from opentuner import ConfigurationManipulator - from opentuner import IntegerParameter - from opentuner import MeasurementInterface - from opentuner import Result - - - class GccFlagsTuner(MeasurementInterface): - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - manipulator.add_parameter( - IntegerParameter('blockSize', 1, 10)) - return manipulator - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - gcc_cmd = 'g++ mmm_block.cpp ' - gcc_cmd += '-DBLOCK_SIZE='+ cfg['blockSize'] - gcc_cmd += ' -o ./tmp.bin' - - compile_result = self.call_program(gcc_cmd) - assert compile_result['returncode'] == 0 - - run_cmd = './tmp.bin' - - run_result = self.call_program(run_cmd) - assert run_result['returncode'] == 0 - - return Result(time=run_result['time']) - - def save_final_config(self, configuration): - """called at the end of tuning""" - print "Optimal block size written to mmm_final_config.json:", configuration.data - self.manipulator().save_to_file(configuration.data, - 'mmm_final_config.json') - - - if __name__ == '__main__': - argparser = opentuner.default_argparser() - GccFlagsTuner.main(argparser.parse_args()) - - -This file consists of several components, each of which will be discussed in further detail below. - -Tuning Programs have a general structure as follows: - - from opentuner import MeasurementInterface - from opentuner import Result - -Create an instance of class GccFlagsTuner, which tunes specified parameters using opentuner. - class GccFlagsTuner(MeasurementInterface): - -The manipulator method defines the variable search space by specifying parameters that should be tuned by this instance of GccFlagsTuner - - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - manipulator.add_parameter( - IntegerParameter('blockSize', 1, 10)) - return manipulator - -The run method actually runs opentuner under the given configuration and returns the calculated performance under this configuration. In this example, the blockSize parameter to be tuned is input as a compile-time constant that takes on a value within the specified range each time it is run. However, opentuner also supports other methods of specifying these parameters that may be preferred in different use cases. - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - - gcc_cmd = 'g++ mmm_block.cpp ' - gcc_cmd += '-DBLOCK_SIZE='+ cfg['blockSize'] - gcc_cmd += ' -o ./tmp.bin' - - compile_result = self.call_program(gcc_cmd) - assert compile_result['returncode'] == 0 - - run_cmd = './tmp.bin' - - run_result = self.call_program(run_cmd) - assert run_result['returncode'] == 0 - - return Result(time=run_result['time']) - -We can actually display the result of running opentuner(the optimal block size for our multiplication problem) by creating a method, save_final_config() in our class. This saves a json dictionary of the optimal blockSize parameter found to the file mmm_final_config.json - - def save_final_config(self, configuration): - """called at the end of tuning""" - print "Optimal block size written to mmm_final_config.json:", configuration.data - self.manipulator().save_to_file(configuration.data, - 'mmm_final_config.json') - - if __name__ == '__main__': - argparser = opentuner.default_argparser() - GccFlagsTuner.main(argparser.parse_args()) - -Generating and Viewing Results ------------------------------- - -Run the following command to autotune our program(The --no-dups flag hides warnings about duplicate results and the --stop-after parameter specifies that we are running opentuner for a maximum of 30 seconds): - - python mmm_tuner.py --no-dups --stop-after=30 - -The results of each run configuration will be displayed as follows(output lines are truncated for readability here): - - [ 10s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - [ 19s] INFO opentuner.search.metatechniques: AUCBanditMetaTechniqueA: [('DifferentialEvolutionAlt', 477), ('UniformGreedyMutation', 18), ('NormalGreedyMutation', 5), ('RandomNelderMead', 1)] - [ 20s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - [ 30s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - [ 30s] INFO opentuner.search.plugin.DisplayPlugin: tests=10, best {'BLOCK_SIZE': 4}, cost time=0.0081, found by DifferentialEvolutionAlt[...] - Optimal block size written to mmm_final_config.json: {'BLOCK_SIZE': 4} - - -Look up the optimal BlockSize value by inspecting the following created file: - - mmm_final_config.json - -In this example, the output file content was as follows: - - {'BLOCK_SIZE': 4} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/measure_confidence.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/measure_confidence.py deleted file mode 100644 index 655bdb024f72f0fd47807b5aa2696f9fb89b40e6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/measure_confidence.py +++ /dev/null @@ -1,164 +0,0 @@ - -import argparse -import os -import sys - - -def getAccuracy(file_name): - - if not os.path.exists(file_name): - print("final_accuracy file not found ") - sys.exit(0) - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -total_runs = 100.0 -skip_lines = 0 - - -def test_func(): - print "test_func" - sys.exit(0) - - -def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold): - - #total_runs = 100.0 - successful_runs = 0.0 - total_acc = 0 - - for i in range(int(total_runs)): - - fifo = open("/tmp/myfifo", "w") - fifo.write("start_run") - fifo.close() - - print "Waiting for process to signal back - when done processing one run" - - fifo2 = open("/tmp/myfifo", "r") - fifo2.read() - fifo2.close() - - print "Process Signalled back" - - accuracy = getAccuracy("final_accuracy") - total_acc += accuracy - - if accuracy > accuracy_threshold: - successful_runs += 1 - - confidence = (successful_runs / total_runs) * 100.0 - print("confidence = ", confidence) - avg_acc = total_acc / total_runs - print("average accuracy = ", avg_acc) - - return confidence, avg_acc - - -def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("opentuner_flags", "w+") - - index = 0 - results_str = "" - for x in f: - if index >= skip_lines: - error_knob = int(float(x.split()[1])) - print error_knob - tuner_file.write(str(error_knob) + "\n") - - results_str += x - index += 1 - - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs(binary_name, accuracy, confidence) - - if run_confidence > 90: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(results_str) - f2.close() - - conf_result = (run_confidence, avg_accuracy, file_name) - confidence_list.append(conf_result) - - return confidence_list - - - -def dump_high_confidence_files(binary, result_dir, accuracy, confidence): - - #result_dir = args.result_dir - output_dir = result_dir + "/high_confidence" - result_dir = result_dir + "/full_results" - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - print "Dumped Confidence Summary" - - - - - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - argparser.add_argument('--output-dir', help='Directory for storing output directory') - argparser.add_argument('--binary', help='Binary name to run') - argparser.add_argument('--accuracy', type=float, help='Accuracy constraint') - argparser.add_argument('--confidence', type=float, help='Confidence threshold') - - - args = argparser.parse_args() - result_dir = args.result_dir - output_dir = args.output_dir - binary = args.binary - accuracy = args.accuracy - confidence = args.confidence - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - #print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/mmm_block.cpp b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/mmm_block.cpp deleted file mode 100755 index 0bb76845f8d6653d1c90a0a5b387e75c46e18233..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/mmm_block.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include <stdio.h> -#include <cstdlib> - -#define N 100 - -int main(int argc, const char** argv) -{ - - int n = BLOCK_SIZE * (N/BLOCK_SIZE); - int a[N][N]; - int b[N][N]; - int c[N][N]; - int sum=0; - for(int k1=0;k1<n;k1+=BLOCK_SIZE) - { - for(int j1=0;j1<n;j1+=BLOCK_SIZE) - { - for(int k1=0;k1<n;k1+=BLOCK_SIZE) - { - for(int i=0;i<n;i++) - { - for(int j=j1;j<j1+BLOCK_SIZE;j++) - { - sum = c[i][j]; - for(int k=k1;k<k1+BLOCK_SIZE;k++) - { - sum += a[i][k] * b[k][j]; - } - c[i][j] = sum; - } - } - } - } - } - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/mmm_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/mmm_tuner.py deleted file mode 100644 index f92c4c3bfc9640514e4879b1e46480613015c207..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/mmm_tuner.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -# -# Optimize blocksize of apps/mmm_block.cpp -# -# This is an extremely simplified version meant only for tutorials -# -import adddeps # fix sys.path - -import opentuner -from opentuner import ConfigurationManipulator -from opentuner import MeasurementInterface -from opentuner import Result -from opentuner import EnumParameter -import os -import sys - - -tuning_flags = [] - -binary_name = "" - - -class ClangFlagsTuner(MeasurementInterface): - def manipulator(self): - """ - Define the search space by creating a - ConfigurationManipulator - """ - manipulator = ConfigurationManipulator() - for flag in tuning_flags: - manipulator.add_parameter( - EnumParameter(flag, - [0, 1, 2, 3, 4, 5, 6])) #default is needed, optimizations don't work without it(tried and tested) - return manipulator - - def compile(self, cfg, id): - """ - Compile a given configuration in parallel - """ - cmd_config = "" - for flag in tuning_flags: - flag_value = cfg[flag] - cmd_config += " " + flag_value - - run_cmd = binary_name + cmd_config - return self.call_program(run_cmd) - - def run_precompiled(self, desired_result, input, limit, compile_result, id): - """ - Run a compile_result from compile() sequentially and return performance - """ - run_result_call_program = self.call_program(binary_filename.format(id)) - run_result_getFileSize = self.getFileSize(output_filename) - self.store_size_list(run_result_getFileSize) - return Result(size=run_result_getFileSize['binary_size'],time=run_result_call_program['time']) - - def run(self, desired_result, input, limit): - """ - Compile and run a given configuration then - return performance - """ - cfg = desired_result.configuration.data - self.store_config_list(cfg) - compile_result = self.compile(cfg, 0) - return self.run_precompiled(desired_result, input, limit, compile_result, 0) - - list_size = [] # list of file sizes - list_config = [] #list of configurations - list_size_config = [] #list of file size with corresponding optimization - list_N_size_config=[] - - def store_size_list(self, binary_size): - """stores file size in a list""" - self.list_size.append(binary_size) - - def store_config_list(self,cfg): - """stores configurations in a list""" - self.list_config.append(cfg) - - counter = 0 - def save_final_config(self,configuration): - """saves list of file size with corresponding optimization in a file""" - for list in self.list_size: - dict_size_config = {self.list_size[self.counter]['binary_size']: self.list_config[self.counter]} - self.list_size_config.append(dict_size_config) - self.list_size_config.sort() - self.counter += 1 - self.extract_topN_resuls(10) - print "ALL file sizes along with corresponding configurations writtent to size_config.json" - self.manipulator().save_to_file(self.list_size_config, - 'size_config.json') - - def extract_topN_resuls(self,N): - """extracts top N results w.r.t size,N currently set to 10""" - counter=0 - for list in self.list_size_config: - if counter < N: - self.list_N_size_config.append(list) - print "Top "+str(N)+" file sizes along with corresponding configurations writtent to TopN_size_config.json" - self.manipulator().save_to_file(self.list_size_config, - 'TopN_size_config.json') - - - -if __name__ == '__main__': - - binary_name = sys.argv[1] - num_flags = int(sys.argv[2]) - - for i in range(num_flags): - tuning_flags.append("flag" + str(i)) - - print tuning_flags - - argparser = opentuner.default_argparser() - ClangFlagsTuner.main(argparser.parse_args()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/select_top_results.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/select_top_results.py deleted file mode 100644 index 7ee878e5f8f84f3f56ea982c1f933b2c1a5b914b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/tutorials/select_top_results.py +++ /dev/null @@ -1,101 +0,0 @@ - - -import argparse -import sys -import os - - -log_index = 9 -linear_index = 10 -quad_index = 11 - -top_k = 10 -skip_lines = 1 - - -def dump_results(sorted_list, k, result_dir, sub_dir): - - ref_dir = result_dir + "/" + sub_dir - if not os.path.exists(ref_dir): - os.mkdir(ref_dir) - - for i in range(min(k, len(sorted_list)) ): - file_name = sorted_list[i][1] - file_name = ref_dir + "/" + file_name + "_rank_" + str(i) - f = open(file_name, "w+") - f.write(str(sorted_list[i][2]) + "\t") - f.write(str(sorted_list[i][3]) + "\t") - f.write(str(sorted_list[i][4]) + "\n") - f.write(sorted_list[i][0]) - f.close() - - - - -def select_top_results(result_dir): - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - results_arr = [] - - for file_name in file_names: - - if file_name == "confidence_summary.txt": - continue - - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - log_result = 0.0 - linear_result = 0.0 - quad_result = 0.0 - file_str = "" - - index = 0 - f = open(result_dir + "/" + file_name) - for x in f: - if index >= skip_lines: - words = x.split() - log_result += float(words[log_index]) - linear_result += float(words[linear_index]) - quad_result += float(words[quad_index]) - file_str += x - - index += 1 - - - file_result = (file_str, file_name, log_result, linear_result, quad_result) - results_arr.append(file_result) - - - sorted_list = sorted(results_arr, key = lambda tup: tup[2]) - dump_results(sorted_list, top_k, result_dir, "log") - - sorted_list = sorted(results_arr, key = lambda tup: tup[3]) - dump_results(sorted_list, top_k, result_dir, "linear") - - sorted_list = sorted(results_arr, key = lambda tup: tup[4]) - dump_results(sorted_list, top_k, result_dir, "quad") - - -#def select_top_configuration(result_dir): - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - - args = argparser.parse_args() - result_dir = args.result_dir - - select_top_results(result_dir) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/adddeps.py deleted file mode 100644 index ede22a8fcdb2a94db7915ff3beb90894b2cb8592..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/adddeps.py +++ /dev/null @@ -1,6 +0,0 @@ -# we would prefer a symbolic link, but it does not work on windows -import os -target = os.path.join(os.path.dirname(__file__), - '../../opentuner/utils/adddeps.py') -execfile(target, dict(__file__=target)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/cla_func.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/cla_func.py deleted file mode 100644 index f4787a2f23f175457ee527f8569dca39bf450605..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/cla_func.py +++ /dev/null @@ -1,144 +0,0 @@ -import numpy as np -import math - - -class Op: - def __init__(self): - self.M = [] - self.name = []; - self.mutation_partners = []; - self.anti_operator = []; - - self.mutation_partners_no = [] - self.anti_operator_no = [] - - # create all operators - self.create_operators() - - # check unitarity of all operators - self.check_unitarity() - - # determine the indices of the mutation partners - self.determine_index_of_mutation_partners() - - # determine the indices of the anti-operators - self.determine_index_of_anti_operators() - - - def create_operators(self): - - # example with +Z - #self.M.append(np.matrix([[1.0, 2.0], [2.0+2.0j, 3.0]])) - # watch out: python needs 1.0 instead of just 1 to assume float variables - #self.name.append('asd'); - #self.mutation_partners.append(['+z','+w']); - #self.anti_operator.append('+w'); - - # Operators - alpha = math.pi / 3.0; - da = math.pi / 10.0; - - # operator 1 +z - self.M.append(np.matrix( - [[math.cos(da / 2.0) - 1j * math.sin(da / 2.0), 0.0], - [0.0, math.cos(da / 2.0) + 1j * math.sin(da / 2.0)]])) - self.name.append('+z'); - self.mutation_partners.append(['-z', '+w', '-w']); - self.anti_operator.append('-z'); - - # operator 2 -z - self.M.append(np.matrix( - [[math.cos(-da / 2.0) - 1j * math.sin(-da / 2.0), 0.0], - [0.0, math.cos(-da / 2.0) + 1j * math.sin(-da / 2.0)]])) - self.name.append('-z'); - self.mutation_partners.append(['+z', '+w', '-w']); - self.anti_operator.append('+z'); - - # operator 3 +w - self.M.append(np.matrix([ - [math.cos(da / 2.0) - 1j * math.cos(alpha) * math.sin(da / 2.0), - -math.sin(alpha) * math.sin(da / 2.0)], - [math.sin(alpha) * math.sin(da / 2.0), - math.cos(da / 2.0) + 1j * math.cos(alpha) * math.sin(da / 2.0)]])) - self.name.append('+w'); - self.mutation_partners.append(['+z', '-z', '-w']); - self.anti_operator.append('-w'); - - # operator 4 -w - self.M.append(np.matrix([ - [math.cos(-da / 2.0) - 1j * math.cos(alpha) * math.sin(-da / 2.0), - -math.sin(alpha) * math.sin(-da / 2.0)], - [math.sin(alpha) * math.sin(-da / 2.0), - math.cos(-da / 2.0) + 1j * math.cos(alpha) * math.sin(-da / 2.0)]])) - self.name.append('-w'); - self.mutation_partners.append(['+z', '-z', '+w']); - self.anti_operator.append('+w'); - - - def check_unitarity(self): - # this function checks if all defined operators are unitary - # in case one isn't unitary the program stops - for k in range(len(self.M)): - if (np.trace(self.M[k] * self.M[k].getH()) - 2 != 0): - print "Operator " + self.name[k] + " (no. " + str( - k) + ") isn't unitary!" - exit() - - def determine_index_of_mutation_partners(self): - # create a field for each operator with an array of possible other gates for the mutation step - for k in range(len(self.M)): - hlp = [] - for m in range(len(self.mutation_partners[k])): - # go through all possible partners and find them among the operators - for n in range(len(self.M)): - if self.mutation_partners[k][m] is self.name[n]: - hlp.append(n) - self.mutation_partners_no.append(hlp) - - def determine_index_of_anti_operators(self): - # determine the Anti operator index - for k in range(len(self.M)): - found_operator = False - for n in range(len(self.M)): - # go through all possible partners and find them among the operators - if self.anti_operator[k] is self.name[n]: - self.anti_operator_no.append(n); - found_operator = True - - if found_operator == False: - print "Couldn't find the anti-operator for operator " + self.name[ - k] + " (no " + str(k) + ")" - - def __str__(self): - # just a test to play around - hlpstr = '' - for k in range(len(self.M)): - hlpstr = hlpstr + self.name[k] + " " + str( - self.anti_operator_no[k]) + "\n" - - return "Operator Class:\n" + hlpstr - - -def calc_fidelity(sequence, Op, Ugoal): - # Op will be function that return operator matrix - # Ugoal 2x2 unitary matrix - # sequence = [1 2 3 4]; - # return = fidelity - - # example: - # sequence = [1 4 2 4 5]; - # Uapprox = Op(1) * Op(4) * Op(2) * Op(4) * Op(5); - - # create identity matrix - Uapprox = np.eye(len(Ugoal)) - - for k in range(len(sequence)): - Uapprox = Op.M[sequence[k]] * Uapprox - - # M.getH() returns the complex conjugate of self - result = (1.0 / len(Ugoal)) * abs(np.trace(Ugoal * Uapprox.getH())) - - return result - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/input_generator.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/input_generator.py deleted file mode 100644 index 009af836f435d013050ff877c4cd66d86019edfc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/input_generator.py +++ /dev/null @@ -1,100 +0,0 @@ -import numpy as np -import math -import random - - -def generate_random_Ugoal_HARD(N, **kwargs): - # N is the length of random matrix multiplication yielding Ugoal - # N ~ 100 should be enough - # This method is hard because it creates Ugoal over the whole space - # Ugoal 2x2 unitary matrix - - # create identity matrix - Ugoal = np.eye(2) - - # create all N random angles in 2*pi*[0,1) - seq_angle = 2.0 * math.pi * np.random.rand(1, N) - - # determine random operator - help2 = np.random.randint(3, size=(1, N)) - - for k in range(N): - hlp = seq_angle[0][k]; - if help2[0][k] == 0: - Ugoal = X_Mat(hlp) * Ugoal - elif help2[0][k] == 1: - Ugoal = Y_Mat(hlp) * Ugoal - else: - Ugoal = Z_Mat(hlp) * Ugoal - - return Ugoal - - -def generate_random_Ugoal_EASY(N, alpha): - # N is the length of random matrix multiplication yielding Ugoal - # N ~ 100 should be enough - # alpha is the used angle between rotation axes - # This method is easy because it creates Ugoal over the whole space - # Ugoal 2x2 unitary matrix - - # create identity matrix - Ugoal = np.eye(2) - - # create all N random angles in 2*pi*[0,1) - seq_angle = 2.0 * math.pi * np.random.rand(1, N) - - # determine random operator - help2 = np.random.randint(2, size=(1, N)) - - for k in range(N): - hlp = seq_angle[0][k]; - if help2[0][k] == 0: - Ugoal = Z_Mat(hlp) * Ugoal - else: - Ugoal = W_Mat(hlp, alpha) * Ugoal - - return Ugoal - - -def generate_random_Ugoal_RANDOM(**kwargs): - # Random guess with the following parametrization for U - # U = @(q1, q2, q3) [ - # [ cos(q1)*exp( i*q2 ), sin(q1)*exp( i*q3 )]; - # [-sin(q1)*exp(-i*q3 ), cos(q1)*exp(-i*q2 )] - # ]; - - # create random angles - q1 = random.uniform(0.0, 0.5 * math.pi) - q2 = random.uniform(0.0, 2.0 * math.pi) - q3 = random.uniform(0.0, 2.0 * math.pi) - - return np.matrix([ - [math.cos(q1) * my_cexp(q2), math.sin(q1) * my_cexp(q3)], - [-math.sin(q1) * my_cexp(-q3), math.cos(q1) * my_cexp(-q2)]]) - - -def my_cexp(x): - return math.cos(x) + 1j * math.sin(x) - - -def X_Mat(a): - return np.matrix([[math.cos(a / 2.0), -1j * math.sin(a / 2.0)], - [-1j * math.sin(a / 2.0), math.cos(a / 2.0)]]) - - -def Y_Mat(a): - return np.matrix([[math.cos(a / 2.0), -math.sin(a / 2.0)], - [math.sin(a / 2.0), math.cos(a / 2.0)]]) - - -def Z_Mat(a): - return np.matrix([[math.cos(-a / 2.0) + 1j * math.sin(-a / 2.0), 0], - [0, math.cos(a / 2.0) + 1j * math.sin(a / 2.0)]]) - - -def W_Mat(a, alpha): - return np.matrix([[math.cos(a / 2) - 1j * math.cos(alpha) * math.sin(a / 2.0), - -math.sin(a / 2.0) * math.sin(alpha)], - [math.sin(a / 2.0) * math.sin(alpha), - math.cos(a / 2.0) + 1j * math.cos(alpha) * math.sin( - a / 2.0)]]) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/problem_description.pdf b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/problem_description.pdf deleted file mode 100644 index e8d09de95a8a6416bf88f10a4d6e4a0fca92670d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/problem_description.pdf and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/testwrapper.sh b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/testwrapper.sh deleted file mode 100755 index e08593a8764b81a8e68380f9d46753c7a73859c0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/testwrapper.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -COUNT=50 -for Z in `seq $COUNT` -do - for T in `./unitary.py --list-techniques $@`; - do - echo $Z/$COUNT $T - ./unitary.py --technique=$T $@ - done -done - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/unitary.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/unitary.py deleted file mode 100755 index cfa5fe114155f9a7efbd25d191d520846e3d4017..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/examples/unitary/unitary.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -# -# This is a quantum control example motivated by the experimental need -# to synthesize unitary matrices in SU(2) in optimal time, given an -# explicit and finite control set generating the whole space, and an -# admissible error. -# -# See problem_description.pdf for additional details. -# -# Contributed by Clarice D. Aiello <clarice@mit.edu> -# - -import adddeps # fix sys.path - -import argparse -import logging -import math -import random -import sys - -try: - import numpy as np -except: - print >> sys.stderr, ''' - -ERROR: import numpy failed, please install numpy - -Possible things to try: - ../../venv/bin/pip install numpy - ../../venv/bin/easy_install numpy - sudo apt-get install python-numpy - -''' - raise - -import opentuner - -from math import sqrt -import cla_func -from input_generator import (generate_random_Ugoal_HARD, - generate_random_Ugoal_EASY, - generate_random_Ugoal_RANDOM) - -from opentuner.search.manipulator import (ConfigurationManipulator, - SwitchParameter, - IntegerParameter, - FloatParameter) - - -def generate_random_Ugoal_FIXED(**kwargs): - Ag = -1 / sqrt(10); - Bg = sqrt(2) / sqrt(10); - Cg = -sqrt(3) / sqrt(10); - Dg = -sqrt(4) / sqrt(10); - return cla_func.np.matrix( - [[Ag + Cg * 1j, Bg + Dg * 1j], [-Bg + Dg * 1j, Ag - Cg * 1j]]) - - -log = logging.getLogger(__name__) - -generators = { - 'hard': generate_random_Ugoal_HARD, - 'easy': generate_random_Ugoal_EASY, - 'random': generate_random_Ugoal_RANDOM, - 'fixed': generate_random_Ugoal_FIXED, -} - -parser = argparse.ArgumentParser(parents=opentuner.argparsers()) -parser.add_argument('--seq-len', type=int, default=10, - help='maximum length for generated sequence') -parser.add_argument('--goal-type', choices=generators.keys(), default='hard', - help='method used to generate goal') -parser.add_argument('--goal-n', type=int, default=100, - help='argument to ugoal generator') -parser.add_argument('--goal-alpha', type=float, - default=random.random() * math.pi, - help='argument to ugoal generator') - - -class Unitary(opentuner.measurement.MeasurementInterface): - def __init__(self, *pargs, **kwargs): - super(Unitary, self).__init__(*pargs, **kwargs) - - self.op = cla_func.Op() - self.num_operators = len(self.op.M) - self.Ugoal = generators[args.goal_type](N=args.goal_n, - alpha=args.goal_alpha) - - - def run(self, desired_result, input, limit): - cfg = desired_result.configuration.data - - sequence = [cfg[i] for i in xrange(self.args.seq_len) - if cfg[i] < self.num_operators] - # sequence can be shorter than self.args.seq_len with null operator - - if len(sequence) > 0: - accuracy = cla_func.calc_fidelity(sequence, self.op, self.Ugoal) - # ~.99 is acceptable - else: - accuracy = 0.0 - - return opentuner.resultsdb.models.Result(time=0.0, - accuracy=accuracy, - size=len(sequence)) - - def manipulator(self): - manipulator = ConfigurationManipulator() - for d in xrange(self.args.seq_len): - # we add 1 to num_operators allow a ignored 'null' operator - manipulator.add_parameter(SwitchParameter(d, self.num_operators + 1)) - return manipulator - - def save_final_config(self, configuration): - ''' - called at the end of autotuning with the best resultsdb.models.Configuration - ''' - cfg = configuration.data - sequence = [cfg[i] for i in xrange(self.args.seq_len) - if cfg[i] < self.num_operators] - print "Final sequence", sequence - - def objective(self): - # we could have also chosen to store 1.0 - accuracy in the time field - # and use the default MinimizeTime() objective - return opentuner.search.objective.MaximizeAccuracyMinimizeSize() - - -if __name__ == '__main__': - args = parser.parse_args() - Unitary.main(args) - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/gen-venv-bootstrap.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/gen-venv-bootstrap.py deleted file mode 100755 index ff159bb1080e7f3f0979e4b60f4d41eea5c9d1e9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/gen-venv-bootstrap.py +++ /dev/null @@ -1,39 +0,0 @@ -#!./venv/bin/python - -extra = ''' - -default_target_dir = 'venv' - -pip_install_packages = filter(len, open('requirements.txt').readlines()) - -import os -import subprocess -import sys - -def adjust_options(options, args): - if len(args)==0: - os.chdir(os.path.dirname(__file__)) - args.append(default_target_dir) - -def after_install(options, home_dir): - from os.path import join - pip = join(home_dir, 'bin/pip') - if not os.path.exists(pip): - # on windows - pip = join(home_dir, 'Scripts/pip.exe') - if not os.path.exists(pip): - print "error", pip, "is missing" - if sys.version_info < (2, 7): - subprocess.call([pip, 'install', 'importlib']) - for prog in pip_install_packages: - subprocess.call([pip, 'install', prog]) - -''' - -import os -import virtualenv - -os.chdir(os.path.dirname(__file__)) -output = virtualenv.create_bootstrap_script(extra) -f = open('venv-bootstrap.py', 'w').write(output) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/install_reqs.sh b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/install_reqs.sh deleted file mode 100644 index e671a5f2a1619f7960fa7471774aa94cab3e0bd6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/install_reqs.sh +++ /dev/null @@ -1,3 +0,0 @@ -pip2 install sqlalchemy -pip2 install psutil -pip2 install opentuner diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/misc/livedisplay.gnuplot b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/misc/livedisplay.gnuplot deleted file mode 100644 index 1d4f13021303b0df3c2821eac3935524f494e18f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/misc/livedisplay.gnuplot +++ /dev/null @@ -1,10 +0,0 @@ - -set terminal x11 -set xlabel "Autotuning Seconds" -set ylabel "Runtime Seconds" -set xrange [0:600] - -plot "/tmp/livedisplay.dat" u 1:2 w lp lw 3 title "Best Execution Time", \ - "/tmp/livedisplaydetails.dat" w p lw 2 title "Tests (excluding timeouts)" - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/misc/livedisplay.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/misc/livedisplay.py deleted file mode 100755 index 5aa3d552d8e5506236d9e004c1f66370b7f19a23..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/misc/livedisplay.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python -import os -import argparse -import subprocess -import time - -parser = argparse.ArgumentParser() -parser.add_argument('--gnuplot-filename', default='livedisplay.gnuplot') -parser.add_argument('--data', default='/tmp/livedisplay.dat') -parser.add_argument('--details', default='/tmp/livedisplaydetails.dat') -parser.add_argument('--xrange', type=float, default=300) -parser.add_argument('--yrange', type=float, default=.05) -parser.add_argument('--yrange2', type=float, default=1.0) -parser.add_argument('--remote') -args = parser.parse_args() - -if args.remote: - if os.path.exists(args.data): - os.unlink(args.data) - if os.path.exists(args.details): - os.unlink(args.details) - syncproc = subprocess.Popen( - ["ssh", args.remote, "tail -f -n10000 " + args.data], - stdout=open(args.data, "w")) - syncproc2 = subprocess.Popen( - ["ssh", args.remote, "tail -f -n10000 " + args.details], - stdout=open(args.details, "w")) - -while '\n' not in open(args.data).read(): - time.sleep(1) -while '\n' not in open(args.details).read(): - time.sleep(1) - -p1 = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE) -p1.stdin.write(open(args.gnuplot_filename).read()) -print >> p1.stdin, 'set title "Zoomed out"' -print >> p1.stdin, "set xrange [0:%f]" % args.xrange -print >> p1.stdin, "set yrange [0:%f]" % args.yrange2 -p1.stdin.flush() - -time.sleep(1) - -p2 = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE) -p2.stdin.write(open(args.gnuplot_filename).read()) -print >> p2.stdin, 'set title "Zoomed in"' -print >> p2.stdin, "set xrange [0:%f]" % args.xrange -print >> p2.stdin, "set yrange [0:%f]" % args.yrange -p2.stdin.flush() - -procs = [p1, p2] - -while True: - time.sleep(1) - for p in procs: - print >> p.stdin, "replot" - p.stdin.flush() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/__init__.py deleted file mode 100644 index 09a5dead02d214f4dce641069d7be66c124f278a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ - -import measurement -import resultsdb -import search -import tuningrunmain -from opentuner.measurement import MeasurementInterface -from opentuner.resultsdb.models import Configuration -from opentuner.resultsdb.models import DesiredResult -from opentuner.resultsdb.models import Result -from opentuner.resultsdb.models import TuningRun -from opentuner.search.manipulator import ConfigurationManipulator -from opentuner.search.manipulator import EnumParameter -from opentuner.search.manipulator import FloatParameter -from opentuner.search.manipulator import IntegerParameter -from opentuner.search.manipulator import LogFloatParameter -from opentuner.search.manipulator import LogIntegerParameter -from opentuner.search.manipulator import PermutationParameter -from opentuner.search.manipulator import ScheduleParameter -from opentuner.search.manipulator import SwitchParameter -from opentuner.tuningrunmain import init_logging - - -def argparsers(): - """ - return a list of ArguementParser to be used as parents to the user's - """ - return [ - measurement.driver.argparser, - measurement.interface.argparser, - search.driver.argparser, - search.plugin.argparser, - search.technique.argparser, - #stats.argparser, - tuningrunmain.argparser, - ] - - -def default_argparser(): - import argparse - return argparse.ArgumentParser(parents=argparsers()) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/api.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/api.py deleted file mode 100644 index 19a2f60935d7a700771778f0a1304f5ff5cbea6d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/api.py +++ /dev/null @@ -1,87 +0,0 @@ -from datetime import datetime -from opentuner import tuningrunmain - - -class TuningRunManager(tuningrunmain.TuningRunMain): - """ - This class manages a tuning run in a "slave" configuration, where main() - is controlled by an another program. - """ - def __init__(self, measurement_interface, args, **kwargs): - super(TuningRunManager, self).__init__(measurement_interface, args, **kwargs) - self.init() - self.tuning_run.state = 'RUNNING' - self.commit(force=True) - self.search_driver.external_main_begin() - - def get_next_desired_result(self): - """ - Returns a opentuner.resultsdb.DesiredResult that should be tested next. - """ - dr = self.measurement_driver.query_pending_desired_results().first() - if dr is None: - self.search_driver.external_main_generation() - dr = self.measurement_driver.query_pending_desired_results().first() - if dr is None: - return None - self.measurement_driver.claim_desired_result(dr) - dr.limit = self.measurement_driver.run_time_limit(dr) - return dr - - def get_desired_results(self): - """ - Returns a list of all opentuner.resultsdb.DesiredResult that should be tested next. - """ - drs = self.measurement_driver.query_pending_desired_results().all() - if len(drs) == 0: - self.search_driver.external_main_generation() - drs = self.measurement_driver.query_pending_desired_results().all() - if len(drs) == 0: - return [] - for dr in drs: - self.measurement_driver.claim_desired_result(dr) - dr.limit = self.measurement_driver.run_time_limit(dr) - - return drs - - def report_result(self, desired_result, result, result_input=None): - """ - Report a measured result. desired_result should have been returned by - get_next_desired_result(). - """ - self.measurement_driver.report_result(desired_result, result, result_input) - - def get_best_configuration(self): - """ - The best configuration found so far. From the current tuning run only. - """ - try: - return self.search_driver.best_result.configuration.data - except AttributeError: - return None - - def get_best_result(self): - """ - The best result found so far. From the current tuning run only. - """ - try: - return self.search_driver.best_result - except AttributeError: - return None - - def finish(self): - """ - Called at the end of the tuning process to call hooks and close database - connections. - """ - self.search_driver.external_main_end() - self.measurement_interface.save_final_config( - self.search_driver.best_result.configuration) - self.tuning_run.final_config = self.search_driver.best_result.configuration - self.tuning_run.state = 'COMPLETE' - self.tuning_run.end_date = datetime.now() - self.commit(force=True) - self.session.close() - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/driverbase.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/driverbase.py deleted file mode 100644 index 5486889c0dcedd4342a9cb463aa0d5047f3c0932..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/driverbase.py +++ /dev/null @@ -1,48 +0,0 @@ -from opentuner.resultsdb.models import * - - -class DriverBase(object): - """ - shared base class between MeasurementDriver and SearchDriver - """ - - def __init__(self, - session, - tuning_run, - objective, - tuning_run_main, - args, - **kwargs): - self.args = args - self.objective = objective - self.session = session - self.tuning_run_main = tuning_run_main - self.tuning_run = tuning_run - self.program = tuning_run.program - - def results_query(self, - generation=None, - objective_ordered=False, - config=None): - q = self.session.query(Result) - q = q.filter_by(tuning_run=self.tuning_run) - - if config: - q = q.filter_by(configuration=config) - - if generation is not None: - subq = (self.session.query(DesiredResult.result_id) - .filter_by(tuning_run=self.tuning_run, - generation=generation)) - q = q.filter(Result.id.in_(subq.subquery())) - - if objective_ordered: - q = self.objective.result_order_by(q) - - return q - - def requests_query(self): - q = self.session.query(DesiredResult).filter_by(tuning_run=self.tuning_run) - return q - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/#interface.py# b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/#interface.py# deleted file mode 100644 index 4fe23da5d904183fa4d3c340a74e89918052823e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/#interface.py# +++ /dev/null @@ -1,359 +0,0 @@ - -import abc -import argparse -import errno -import hashlib -import logging -import os -import re -import signal -import subprocess -import threading -import time -from multiprocessing.pool import ThreadPool - -try: - import resource -except ImportError: - resource = None - -try: - import fcntl -except ImportError: - fcntl = None - -import opentuner -from opentuner import resultsdb -from opentuner.resultsdb.models import * - -log = logging.getLogger(__name__) - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--parallel-compile', action='store_true', - default=False, - help="present if compiling can be done in parallel") - -the_io_thread_pool = None - - -class MeasurementInterface(object): - """ - abstract base class for compile and measurement - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, - args=None, - project_name=None, - program_name='unknown', - program_version='unknown', - manipulator=None, - objective=None, - input_manager=None): - self.args = args - self._project = project_name - self._program = program_name - self._version = program_version - self._objective = objective - self._manipulator = manipulator - self._input_manager = input_manager - - self.pids = [] - self.pid_lock = threading.Lock() - self.parallel_compile = args.parallel_compile - # If parallel_compile is False then compile_and_run() will be invoked - # sequentially otherwise the driver first invokes compile() in parallel - # followed by run_precompiled() sequentially - - def compile(self, config_data, id): - """ - Compile in PARALLEL according to the configuration in config_data - (obtained from desired_result.configuration) Should use id parameter - to determine output location of executable Return value will be passed - to run_precompiled as compile_result, useful for storing error/timeout - information - """ - if self.parallel_compile: - raise RuntimeError('MeasurementInterface.compile() not implemented for', - 'parallel compilation') - pass - - def run_precompiled(self, desired_result, input, limit, compile_result, id): - """ - Run the given desired_result SEQUENTIALLY on input and produce a Result() - Abort early if limit (in seconds) is reached Assume that the executable - to be measured has already been compiled to an executable corresponding to - identifier id by compile() The compile_result is the return result of compile(), - and it will be None if compile() was not called - """ - if self.parallel_compile: - raise RuntimeError('MeasurementInterface.run_precompiled() not implemented', - 'for parallel compilation') - pass - - def cleanup(self, id): - """ - Clean up any temporary files associated with the executable - """ - pass - - def pre_process(self): - """ - The process before each iteration This method will be called - once per iteration before all threads are launched - """ - pass - - def post_process(self): - """ - The process after each iteration This method will be called - once per iteration after all threads are committed - """ - pass - - def extra_convergence_criteria(self, result): - """ - The extra convergence criteria which returns True if the - current result is acceptable by the user - """ - return False - - #@abc.abstractmethod - def compile_and_run(self, desired_result, input, limit): - """ - Compile and run the given desired_result on input and produce a - Result(), abort early if limit (in seconds) is reached This function - is only used for sequential execution flow - - FIXME: Shoud uncomment @abc.abstractmethod Now comment out for - compatiability - """ - return self.run(desired_result, input, limit) - - def run(self, desired_result, input, limit): - """ - run the given desired_result on input and produce a Result(), - abort early if limit (in seconds) is reached - """ - return opentuner.resultdb.models.Result() - - def save_final_config(self, config): - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - try: - config_str = repr(config.data) - if len(config_str) > 256: - config_str = config_str[:256] + '...' - log.info('final configuration: %s', config_str) - log.info('you may want to implement save_final_config(), to store this') - except: - log.error('error printing configuration', exc_info=True) - - def db_program_version(self, session): - """return a version identifier for the program being tuned""" - return resultsdb.models.ProgramVersion.get( - session=session, - project=self.project_name(), - name=self.program_name(), - version=self.program_version(), - parameter_info=self.manipulator().parameters_to_json(), - ) - - def set_driver(self, measurement_driver): - self.driver = measurement_driver - - def project_name(self): - if self._project is not None: - return self._project - autoname = re.sub('(Measurement?)Interface$', '', self.__class__.__name__) - if autoname: - return autoname - else: - return 'unknown' - - def program_name(self): - return self._program - - def program_version(self): - return self._version - - def file_hash(self, filename): - """helper used to generate program versions""" - return hashlib.sha256(open(filename).read()).hexdigest() - - def manipulator(self): - """ - called once to create the search.manipulator.ConfigurationManipulator - """ - if self._manipulator is None: - msg = ('MeasurementInterface.manipulator() must be implemented or a ' - '"manipulator=..." must be provided to the constructor') - log.error(msg) - raise Exception(msg) - return self._manipulator - - def objective(self): - """ - called once to create the search.objective.SearchObjective - """ - if self._objective is None: - from ..search.objective import MinimizeTime - - return MinimizeTime() - return self._objective - - def input_manager(self): - """ - called once to create the measurement.inputmanager.InputManager - """ - if self._objective is None: - from .inputmanager import FixedInputManager - - return FixedInputManager() - return self._input_manager - - def seed_configurations(self): - """ - Extra seed configuration objects to add to those given on the command line. - Configuration objects (typically dictionaries) not database objects. - """ - return [] - - def kill_all(self): - self.pid_lock.acquire() - for pid in self.pids: - goodkillpg(pid) - self.pids = [] - self.pid_lock.release() - - def call_program(self, cmd, limit=None, memory_limit=None, **kwargs): - """ - call cmd and kill it if it runs for longer than limit - - returns dictionary like - {'returncode': 0, - 'stdout': '', 'stderr': '', - 'timeout': False, 'time': 1.89} - """ - the_io_thread_pool_init(self.args.parallelism) - if limit is float('inf'): - limit = None - if type(cmd) in (str, unicode): - kwargs['shell'] = True - killed = False - t0 = time.time() - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - preexec_fn=preexec_setpgid_setrlimit(memory_limit), - **kwargs) - # Add p.pid to list of processes to kill in case of keyboardinterrupt - self.pid_lock.acquire() - self.pids.append(p.pid) - self.pid_lock.release() - - try: - stdout_result = the_io_thread_pool.apply_async(p.stdout.read) - stderr_result = the_io_thread_pool.apply_async(p.stderr.read) - while p.returncode is None: - if limit is None: - goodwait(p) - elif limit and time.time() > t0 + limit: - killed = True - goodkillpg(p.pid) - goodwait(p) - else: - # still waiting... - sleep_for = limit - (time.time() - t0) - if not stdout_result.ready(): - stdout_result.wait(sleep_for) - elif not stderr_result.ready(): - stderr_result.wait(sleep_for) - else: - #TODO(jansel): replace this with a portable waitpid - time.sleep(0.001) - p.poll() - except: - if p.returncode is None: - goodkillpg(p.pid) - raise - finally: - # No longer need to kill p - self.pid_lock.acquire() - if p.pid in self.pids: - self.pids.remove(p.pid) - self.pid_lock.release() - - t1 = time.time() - return {'time': float('inf') if killed else (t1 - t0), - 'timeout': killed, - 'returncode': p.returncode, - 'stdout': stdout_result.get(), - 'stderr': stderr_result.get()} - - def prefix_hook(self, session): - pass - - @classmethod - def main(cls, args, *pargs, **kwargs): - from opentuner.tuningrunmain import TuningRunMain - - return TuningRunMain(cls(args, *pargs, **kwargs), args).main() - - -class DefaultMeasurementInterface(MeasurementInterface): - def run(self, desired_result, input, limit): - raise RuntimeError('MeasurementInterface.run() not implemented') - - -def preexec_setpgid_setrlimit(memory_limit): - if resource is not None: - def _preexec(): - os.setpgid(0, 0) - try: - resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) - except ValueError: - pass # No permission - if memory_limit: - try: - (soft, hard) = resource.getrlimit(resource.RLIMIT_AS) - resource.setrlimit(resource.RLIMIT_AS, (min(soft, memory_limit), - min(hard, memory_limit))) - except ValueError: - pass # No permission - return _preexec - - -def the_io_thread_pool_init(parallelism=1): - global the_io_thread_pool - if the_io_thread_pool is None: - the_io_thread_pool = ThreadPool(2 * parallelism) - # make sure the threads are started up - the_io_thread_pool.map(int, range(2 * parallelism)) - - -def goodkillpg(pid): - """ - wrapper around kill to catch errors - """ - log.debug("killing pid %d", pid) - try: - if hasattr(os, 'killpg'): - os.killpg(pid, signal.SIGKILL) - else: - os.kill(pid, signal.SIGKILL) - except: - log.error('error killing process %s', pid, exc_info=True) - - -def goodwait(p): - """ - python doesn't check if its system calls return EINTR, retry if it does - """ - while True: - try: - rv = p.wait() - return rv - except OSError, e: - if e.errno != errno.EINTR: - raise - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/.#interface.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/.#interface.py deleted file mode 120000 index 68c682013089268d9e8f3e50ca41da1228c544c5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/.#interface.py +++ /dev/null @@ -1 +0,0 @@ -hashim@hashim-VirtualBox.2708:1511328915 \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/__init__.py deleted file mode 100644 index c289e8d6f5081d846ef431f36649b6e976df1a82..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ - -import driver -import interface -from interface import MeasurementInterface -from driver import MeasurementDriver - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/driver.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/driver.py deleted file mode 100644 index d00886920a95e2b7c61ca41b6aea0a89247ab8c9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/driver.py +++ /dev/null @@ -1,271 +0,0 @@ -import argparse -import logging -import time -import socket -import os -from multiprocessing.pool import ThreadPool -from datetime import datetime - -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.orm.exc import NoResultFound - -from opentuner.driverbase import DriverBase -from opentuner.resultsdb.models import * - -log = logging.getLogger(__name__) - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--machine-class', - help="name of the machine class being run on") - - -class MeasurementDriver(DriverBase): - """ - manages the measurement process, reading DesiredResults and creating Results - """ - - def __init__(self, - measurement_interface, - input_manager, - **kwargs): - super(MeasurementDriver, self).__init__(**kwargs) - - if not self.args.machine_class: - self.args.machine_class = 'default' - - self.interface = measurement_interface - self.input_manager = input_manager - self.commit = self.tuning_run_main.commit - self.upper_limit_multiplier = 10.0 - self.default_limit_multiplier = 2.0 - - self.laptime = time.time() - self.machine = self.get_machine() - - def get_machine(self): - """ - get (or create) the machine we are currently running on - """ - hostname = socket.gethostname() - try: - self.session.flush() - return self.session.query(Machine).filter_by(name=hostname).one() - except sqlalchemy.orm.exc.NoResultFound: - m = Machine(name=hostname, - cpu=_cputype(), - cores=_cpucount(), - memory_gb=_memorysize() / ( - 1024.0 ** 3) if _memorysize() else 0, - machine_class=self.get_machine_class()) - self.session.add(m) - return m - - def get_machine_class(self): - """ - get (or create) the machine class we are currently running on - """ - return MachineClass.get(self.session, name=self.args.machine_class) - - def run_time_limit(self, desired_result, default=3600.0 * 24 * 365 * 10): - """return a time limit to apply to a test run (in seconds)""" - best = self.results_query(objective_ordered=True).first() - if best is None: - if desired_result.limit: - return desired_result.limit - else: - return default - - if desired_result.limit: - return min(desired_result.limit, self.upper_limit_multiplier * best.time) - else: - return self.default_limit_multiplier * best.time - - def report_result(self, desired_result, result, input=None): - result.configuration = desired_result.configuration - result.input = input - result.machine = self.machine - result.tuning_run = self.tuning_run - result.collection_date = datetime.now() - self.session.add(result) - desired_result.result = result - desired_result.state = 'COMPLETE' - self.input_manager.after_run(desired_result, input) - result.collection_cost = self.lap_timer() - self.session.flush() # populate result.id - log.debug( - 'Result(id=%d, cfg=%d, time=%.4f, accuracy=%.2f, collection_cost=%.2f)', - result.id, - result.configuration.id, - result.time, - result.accuracy if result.accuracy is not None else float('NaN'), - result.collection_cost) - self.commit() - - def run_desired_result(self, desired_result, compile_result=None, - exec_id=None): - """ - create a new Result using input manager and measurment interface - Optional compile_result paramater can be passed to run_precompiled as - the return value of compile() - Optional exec_id paramater can be passed to run_precompiled in case of - locating a specific executable - """ - desired_result.limit = self.run_time_limit(desired_result) - - input = self.input_manager.select_input(desired_result) - self.session.add(input) - self.session.flush() - - log.debug('running desired result %s on input %s', desired_result.id, - input.id) - - self.input_manager.before_run(desired_result, input) - - if self.interface.parallel_compile: - result = self.interface.run_precompiled(desired_result, input, - desired_result.limit, - compile_result, exec_id) - else: - result = self.interface.compile_and_run(desired_result, input, - desired_result.limit) - - self.report_result(desired_result, result, input) - - def lap_timer(self): - """return the time elapsed since the last call to lap_timer""" - t = time.time() - r = t - self.laptime - self.laptime = t - return r - - def claim_desired_result(self, desired_result): - """ - claim a desired result by changing its state to running - return True if the result was claimed for this process - """ - self.commit() - try: - self.session.refresh(desired_result) - if desired_result.state == 'REQUESTED': - desired_result.state = 'RUNNING' - desired_result.start_date = datetime.now() - self.commit() - return True - except SQLAlchemyError: - self.session.rollback() - return False - - def query_pending_desired_results(self): - q = (self.session.query(DesiredResult) - .filter_by(tuning_run=self.tuning_run, - state='REQUESTED') - .order_by(DesiredResult.generation, - DesiredResult.priority.desc())) - return q - - def process_all(self): - """ - process all desired_results in the database - """ - self.lap_timer() # reset timer - q = self.query_pending_desired_results() - - if self.interface.parallel_compile: - desired_results = [] - thread_args = [] - - def compile_result(args): - interface, data, result_id = args - return interface.compile(data, result_id) - - for dr in q.all(): - if self.claim_desired_result(dr): - desired_results.append(dr) - thread_args.append((self.interface, dr.configuration.data, dr.id)) - if len(desired_results) == 0: - return - thread_pool = ThreadPool(len(desired_results)) - # print 'Compiling %d results' % len(thread_args) - try: - # Use map_async instead of map because of bug where keyboardinterrupts are ignored - # See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool - compile_results = thread_pool.map_async(compile_result, - thread_args).get(9999999) - except Exception: - # Need to kill other processes because only one thread receives - # exception - self.interface.kill_all() - raise - # print 'Running %d results' % len(thread_args) - for dr, compile_result in zip(desired_results, compile_results): - # Make sure compile was successful - self.run_desired_result(dr, compile_result, dr.id) - try: - self.interface.cleanup(dr.id) - except RuntimeError, e: - print e - # print 'Done!' - thread_pool.close() - else: - for dr in q.all(): - if self.claim_desired_result(dr): - self.run_desired_result(dr) - - -def _cputype(): - try: - return re.search(r"model name\s*:\s*([^\n]*)", - open("/proc/cpuinfo").read()).group(1) - except: - pass - try: - # for OS X - import subprocess - - return subprocess.Popen(["sysctl", "-n", "machdep.cpu.brand_string"], - stdout=subprocess.PIPE).communicate()[0].strip() - except: - log.warning("failed to get cpu type") - return "unknown" - - -def _cpucount(): - try: - return int(os.sysconf("SC_NPROCESSORS_ONLN")) - except: - pass - try: - return int(os.sysconf("_SC_NPROCESSORS_ONLN")) - except: - pass - try: - return int(os.environ["NUMBER_OF_PROCESSORS"]) - except: - pass - try: - return int(os.environ["NUM_PROCESSORS"]) - except: - log.warning("failed to get the number of processors") - return 1 - - -def _memorysize(): - try: - return int(os.sysconf("SC_PHYS_PAGES") * os.sysconf("SC_PAGE_SIZE")) - except: - pass - try: - return int(os.sysconf("_SC_PHYS_PAGES") * os.sysconf("_SC_PAGE_SIZE")) - except: - pass - try: - # for OS X - import subprocess - - return int(subprocess.Popen(["sysctl", "-n", "hw.memsize"], - stdout=subprocess.PIPE) - .communicate()[0].strip()) - except: - log.warning("failed to get total memory") - return 1024 ** 3 - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/inputmanager.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/inputmanager.py deleted file mode 100644 index 7acaeaa0cfa178c7e62716a29cca2e9497f255d1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/inputmanager.py +++ /dev/null @@ -1,76 +0,0 @@ -import abc -import opentuner -from opentuner.resultsdb.models import * - - -class InputManager(object): - """ - abstract base class for compile and measurement - """ - __metaclass__ = abc.ABCMeta - - def set_driver(self, measurement_driver): - self.driver = measurement_driver - self.session = measurement_driver.session - self.program = measurement_driver.tuning_run.program - - @abc.abstractmethod - def select_input(self, desired_result): - """ - select the input to be used to test desired_result - """ - return opentuner.resultsdb.models.Input() - - - def before_run(self, desired_result, input): - """hook called before an input is used""" - pass - - def after_run(self, desired_result, input): - """hook called after an input is used""" - pass - - def get_input_class(self): - return None - - -class FixedInputManager(InputManager): - """ - an input manage that produces a single input for all tests - """ - - def __init__(self, - input_class_name='fixed', - size=-1, - path=None, - extra=None): - self.input_class_name = input_class_name - self.size = size - self.path = path - self.extra = extra - self.the_input = None - super(FixedInputManager, self).__init__() - - - def get_input_class(self): - return InputClass.get(self.session, - program=self.program, - name=self.input_class_name, - size=self.size) - - def create_input(self, desired_result): - """create the fixed input database object, result will be cached""" - return Input(input_class=self.get_input_class(), - path=self.path, - extra=self.extra) - - def select_input(self, desired_result): - if self.the_input is None: - self.the_input = self.create_input(desired_result) - return self.the_input - - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/interface.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/interface.py deleted file mode 100644 index 174902488289fe4ef038a9dd3553ea13acc68f2b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/measurement/interface.py +++ /dev/null @@ -1,366 +0,0 @@ -import abc -import argparse -import errno -import hashlib -import logging -import os -import re -import signal -import subprocess -import threading -import time -from multiprocessing.pool import ThreadPool - -try: - import resource -except ImportError: - resource = None - -try: - import fcntl -except ImportError: - fcntl = None - -import opentuner -from opentuner import resultsdb -from opentuner.resultsdb.models import * - -log = logging.getLogger(__name__) - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--parallel-compile', action='store_true', - default=False, - help="present if compiling can be done in parallel") - -the_io_thread_pool = None - - -class MeasurementInterface(object): - """ - abstract base class for compile and measurement - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, - args=None, - project_name=None, - program_name='unknown', - program_version='unknown', - manipulator=None, - objective=None, - input_manager=None): - self.args = args - self._project = project_name - self._program = program_name - self._version = program_version - self._objective = objective - self._manipulator = manipulator - self._input_manager = input_manager - - self.pids = [] - self.pid_lock = threading.Lock() - self.parallel_compile = args.parallel_compile - # If parallel_compile is False then compile_and_run() will be invoked - # sequentially otherwise the driver first invokes compile() in parallel - # followed by run_precompiled() sequentially - - def compile(self, config_data, id): - """ - Compile in PARALLEL according to the configuration in config_data - (obtained from desired_result.configuration) Should use id parameter - to determine output location of executable Return value will be passed - to run_precompiled as compile_result, useful for storing error/timeout - information - """ - if self.parallel_compile: - raise RuntimeError('MeasurementInterface.compile() not implemented for', - 'parallel compilation') - pass - - def run_precompiled(self, desired_result, input, limit, compile_result, id): - """ - Run the given desired_result SEQUENTIALLY on input and produce a Result() - Abort early if limit (in seconds) is reached Assume that the executable - to be measured has already been compiled to an executable corresponding to - identifier id by compile() The compile_result is the return result of compile(), - and it will be None if compile() was not called - """ - if self.parallel_compile: - raise RuntimeError('MeasurementInterface.run_precompiled() not implemented', - 'for parallel compilation') - pass - - def cleanup(self, id): - """ - Clean up any temporary files associated with the executable - """ - pass - - def pre_process(self): - """ - The process before each iteration This method will be called - once per iteration before all threads are launched - """ - pass - - def post_process(self): - """ - The process after each iteration This method will be called - once per iteration after all threads are committed - """ - pass - - def extra_convergence_criteria(self, result): - """ - The extra convergence criteria which returns True if the - current result is acceptable by the user - """ - return False - - #@abc.abstractmethod - def compile_and_run(self, desired_result, input, limit): - """ - Compile and run the given desired_result on input and produce a - Result(), abort early if limit (in seconds) is reached This function - is only used for sequential execution flow - - FIXME: Shoud uncomment @abc.abstractmethod Now comment out for - compatiability - """ - return self.run(desired_result, input, limit) - - def run(self, desired_result, input, limit): - """ - run the given desired_result on input and produce a Result(), - abort early if limit (in seconds) is reached - """ - return opentuner.resultdb.models.Result() - - def save_final_config(self, config): - """ - called at the end of autotuning with the best resultsdb.models.Configuration - """ - try: - config_str = repr(config.data) - if len(config_str) > 256: - config_str = config_str[:256] + '...' - log.info('final configuration: %s', config_str) - log.info('you may want to implement save_final_config(), to store this') - except: - log.error('error printing configuration', exc_info=True) - - def db_program_version(self, session): - """return a version identifier for the program being tuned""" - return resultsdb.models.ProgramVersion.get( - session=session, - project=self.project_name(), - name=self.program_name(), - version=self.program_version(), - parameter_info=self.manipulator().parameters_to_json(), - ) - - def set_driver(self, measurement_driver): - self.driver = measurement_driver - - def project_name(self): - if self._project is not None: - return self._project - autoname = re.sub('(Measurement?)Interface$', '', self.__class__.__name__) - if autoname: - return autoname - else: - return 'unknown' - - def program_name(self): - return self._program - - def program_version(self): - return self._version - - def file_hash(self, filename): - """helper used to generate program versions""" - return hashlib.sha256(open(filename).read()).hexdigest() - - def manipulator(self): - """ - called once to create the search.manipulator.ConfigurationManipulator - """ - if self._manipulator is None: - msg = ('MeasurementInterface.manipulator() must be implemented or a ' - '"manipulator=..." must be provided to the constructor') - log.error(msg) - raise Exception(msg) - return self._manipulator - - def objective(self): - """ - called once to create the search.objective.SearchObjective - """ - if self._objective is None: - from ..search.objective import MinimizeSize - - return MinimizeSize() - return self._objective - - def input_manager(self): - """ - called once to create the measurement.inputmanager.InputManager - """ - if self._objective is None: - from .inputmanager import FixedInputManager - - return FixedInputManager() - return self._input_manager - - def seed_configurations(self): - """ - Extra seed configuration objects to add to those given on the command line. - Configuration objects (typically dictionaries) not database objects. - """ - return [] - - def kill_all(self): - self.pid_lock.acquire() - for pid in self.pids: - goodkillpg(pid) - self.pids = [] - self.pid_lock.release() - - def call_program(self, cmd, limit=None, memory_limit=None, **kwargs): - """ - call cmd and kill it if it runs for longer than limit - - returns dictionary like - {'returncode': 0, - 'stdout': '', 'stderr': '', - 'timeout': False, 'time': 1.89} - """ - the_io_thread_pool_init(self.args.parallelism) - if limit is float('inf'): - limit = None - if type(cmd) in (str, unicode): - kwargs['shell'] = True - killed = False - t0 = time.time() - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - preexec_fn=preexec_setpgid_setrlimit(memory_limit), - **kwargs) - # Add p.pid to list of processes to kill in case of keyboardinterrupt - self.pid_lock.acquire() - self.pids.append(p.pid) - self.pid_lock.release() - - try: - stdout_result = the_io_thread_pool.apply_async(p.stdout.read) - stderr_result = the_io_thread_pool.apply_async(p.stderr.read) - while p.returncode is None: - if limit is None: - goodwait(p) - elif limit and time.time() > t0 + limit: - killed = True - goodkillpg(p.pid) - goodwait(p) - else: - # still waiting... - sleep_for = limit - (time.time() - t0) - if not stdout_result.ready(): - stdout_result.wait(sleep_for) - elif not stderr_result.ready(): - stderr_result.wait(sleep_for) - else: - #TODO(jansel): replace this with a portable waitpid - time.sleep(0.001) - p.poll() - except: - if p.returncode is None: - goodkillpg(p.pid) - raise - finally: - # No longer need to kill p - self.pid_lock.acquire() - if p.pid in self.pids: - self.pids.remove(p.pid) - self.pid_lock.release() - - # TODO-autotune: Extract the file size and use it - # FIXIT: Appropriately update the file size - t1 = time.time() - return {'time': float('inf') if killed else (t1 - t0), - 'timeout': killed, - 'returncode': p.returncode, - 'stdout': stdout_result.get(), - 'stderr': stderr_result.get(), - } - - def getFileSize(self,filename): - fileinfo=os.stat(filename) - file_size=fileinfo.st_size - return {'binary_size': file_size} - - def prefix_hook(self, session): - pass - - @classmethod - def main(cls, args, *pargs, **kwargs): - from opentuner.tuningrunmain import TuningRunMain - - return TuningRunMain(cls(args, *pargs, **kwargs), args).main() - - -class DefaultMeasurementInterface(MeasurementInterface): - def run(self, desired_result, input, limit): - raise RuntimeError('MeasurementInterface.run() not implemented') - - -def preexec_setpgid_setrlimit(memory_limit): - if resource is not None: - def _preexec(): - os.setpgid(0, 0) - try: - resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) - except ValueError: - pass # No permission - if memory_limit: - try: - (soft, hard) = resource.getrlimit(resource.RLIMIT_AS) - resource.setrlimit(resource.RLIMIT_AS, (min(soft, memory_limit), - min(hard, memory_limit))) - except ValueError: - pass # No permission - return _preexec - - -def the_io_thread_pool_init(parallelism=1): - global the_io_thread_pool - if the_io_thread_pool is None: - the_io_thread_pool = ThreadPool(2 * parallelism) - # make sure the threads are started up - the_io_thread_pool.map(int, range(2 * parallelism)) - - -def goodkillpg(pid): - """ - wrapper around kill to catch errors - """ - log.debug("killing pid %d", pid) - try: - if hasattr(os, 'killpg'): - os.killpg(pid, signal.SIGKILL) - else: - os.kill(pid, signal.SIGKILL) - except: - log.error('error killing process %s', pid, exc_info=True) - - -def goodwait(p): - """ - python doesn't check if its system calls return EINTR, retry if it does - """ - while True: - try: - rv = p.wait() - return rv - except OSError, e: - if e.errno != errno.EINTR: - raise - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/__init__.py deleted file mode 100644 index a0150a1577e22cdfd50e490bb4a0c6b735bfcac8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ - -from connect import connect - -import models - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/connect.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/connect.py deleted file mode 100644 index 1a04d05447a3b62d241a4f2402c22cac15b98b3b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/connect.py +++ /dev/null @@ -1,66 +0,0 @@ -from sqlalchemy import create_engine -from sqlalchemy.orm import scoped_session, sessionmaker -from models import Base, _Meta -import logging -import time -from pprint import pprint - -log = logging.getLogger(__name__) - -DB_VERSION = "0.0" - -if False: # profiling of queries - import atexit - from sqlalchemy import event - from collections import Counter - from sqlalchemy.engine import Engine - the_query_totals = Counter() - - @event.listens_for(Engine, "before_cursor_execute") - def before_cursor_execute(conn, cursor, statement, - parameters, context, executemany): - context._query_start_time = time.time() - - @event.listens_for(Engine, "after_cursor_execute") - def after_cursor_execute(conn, cursor, statement, - parameters, context, executemany): - total = time.time() - context._query_start_time - the_query_totals[statement] += total - - @atexit.register - def report(): - pprint(the_query_totals.most_common(10)) - - -def connect(dbstr): - engine = create_engine(dbstr, echo = False) - connection = engine.connect() - - #handle case that the db was initialized before a version table existed yet - if engine.dialect.has_table(connection, "program"): - # if there are existing tables - if not engine.dialect.has_table(connection, "_meta"): - # if no version table, assume outdated db version and error - connection.close() - raise Exception("Your opentuner database is currently out of date. Save a back up and reinitialize") - - # else if we have the table already, make sure version matches - if engine.dialect.has_table(connection, "_meta"): - Session = scoped_session(sessionmaker(autocommit=False, - autoflush=False, - bind=engine)) - version = _Meta.get_version(Session) - if not DB_VERSION == version: - raise Exception('Your opentuner database version {} is out of date with the current version {}'.format(version, DB_VERSION)) - - Base.metadata.create_all(engine) - - Session = scoped_session(sessionmaker(autocommit=False, - autoflush=False, - bind=engine)) - # mark database with current version - _Meta.add_version(Session, DB_VERSION) - Session.commit() - - return engine, Session - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/models.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/models.py deleted file mode 100644 index dd88ae8e51c0d94db2364cbc444b9a11d2667116..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/resultsdb/models.py +++ /dev/null @@ -1,319 +0,0 @@ -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.ext.declarative import declared_attr -from sqlalchemy import create_engine -from sqlalchemy.orm import relationship, backref -from sqlalchemy import ( - Column, Integer, String, DateTime, Boolean, Enum, - Float, PickleType, ForeignKey, Text, func, Index) -import sqlalchemy -import re - -from cPickle import dumps, loads -from gzip import zlib -class CompressedPickler(object): - @classmethod - def dumps(cls, obj, protocol=2): - s = dumps(obj, protocol) - sz = zlib.compress(s, 9) - if len(sz) < len(s): - return sz - else: - return s - - @classmethod - def loads(cls, string): - try: - s = zlib.decompress(string) - except: - s = string - return loads(s) - -class Base(object): - @declared_attr - def __tablename__(cls): - """convert camel case to underscores""" - return re.sub(r'([a-z])([A-Z])', r'\1_\2', cls.__name__).lower() - - id = Column(Integer, primary_key=True, index=True) - - -Base = declarative_base(cls=Base) - -class _Meta(Base): - """ meta table to track current version """ - db_version = Column(String(128)) - - @classmethod - def has_version(cls, session, version): - try: - session.flush() - session.query(_Meta).filter_by(db_version=version).one() - return True - except sqlalchemy.orm.exc.NoResultFound: - return False - - @classmethod - def get_version(cls, session): - try: - session.flush() - x = session.query(_Meta).one() - return x.db_version - except sqlalchemy.orm.exc.NoResultFound: - return None - - @classmethod - def add_version(cls, session, version): - if not cls.has_version(session, version): - session.add(_Meta(db_version=version)) - - -class Program(Base): - project = Column(String(128)) - name = Column(String(128)) - - @classmethod - def get(cls, session, project, name): - try: - session.flush() - return session.query(Program).filter_by(project=project, name=name).one() - except sqlalchemy.orm.exc.NoResultFound: - t = Program(project=project, name=name) - session.add(t) - return t - - -class ProgramVersion(Base): - program_id = Column(ForeignKey(Program.id)) - program = relationship(Program, backref='versions') - version = Column(String(128)) - parameter_info = Column(Text) - - @property - def name(self): - return self.program.name - - @property - def project(self): - return self.program.project - - @classmethod - def get(cls, session, project, name, version, parameter_info=None): - program = Program.get(session, project, name) - try: - session.flush() - if parameter_info is None: - return session.query(ProgramVersion).filter_by(program=program, - version=version).one() - else: - return session.query(ProgramVersion).filter_by(program=program, - version=version, - parameter_info=parameter_info).one() - except sqlalchemy.orm.exc.NoResultFound: - t = ProgramVersion(program=program, version=version, parameter_info=parameter_info) - session.add(t) - return t - - -class Configuration(Base): - program_id = Column(ForeignKey(Program.id)) - program = relationship(Program) - hash = Column(String(64)) - data = Column(PickleType(pickler=CompressedPickler)) - - @classmethod - def get(cls, session, program, hashv, datav): - try: - session.flush() - return (session.query(Configuration) - .filter_by(program=program, hash=hashv).one()) - except sqlalchemy.orm.exc.NoResultFound: - t = Configuration(program=program, hash=hashv, data=datav) - session.add(t) - return t - - -Index('ix_configuration_custom1', Configuration.program_id, Configuration.hash) - - -class MachineClass(Base): - name = Column(String(128)) - - @classmethod - def get(cls, session, name): - try: - session.flush() - return session.query(MachineClass).filter_by(name=name).one() - except sqlalchemy.orm.exc.NoResultFound: - t = MachineClass(name=name) - session.add(t) - return t - - -class Machine(Base): - name = Column(String(128)) - - cpu = Column(String(128)) - cores = Column(Integer) - memory_gb = Column(Float) - - machine_class_id = Column(ForeignKey(MachineClass.id)) - machine_class = relationship(MachineClass, backref='machines') - - -class InputClass(Base): - program_id = Column(ForeignKey(Program.id)) - program = relationship(Program, backref='inputs') - - name = Column(String(128)) - size = Column(Integer) - - @classmethod - def get(cls, session, program, name='default', size=-1): - try: - session.flush() - return session.query(InputClass).filter_by(program=program, - name=name, - size=size).one() - except sqlalchemy.orm.exc.NoResultFound: - t = InputClass(program=program, name=name, size=size) - session.add(t) - return t - - -class Input(Base): - #state = Column(Enum('ANY_MACHINE', 'SINGLE_MACHINE', 'DELETED'), - # default='ANY_MACHINE', name='t_input_state') - - input_class_id = Column(ForeignKey(InputClass.id)) - input_class = relationship(InputClass, backref='inputs') - - #optional, set only for state='SINGLE_MACHINE' - #machine_id = Column(ForeignKey(MachineClass.id)) - #machine = relationship(MachineClass, backref='inputs') - - #optional, for use by InputManager - path = Column(Text) - extra = Column(PickleType(pickler=CompressedPickler)) - - -class TuningRun(Base): - uuid = Column(String(32), index=True, unique=True) - - program_version_id = Column(ForeignKey(ProgramVersion.id)) - program_version = relationship(ProgramVersion, backref='tuning_runs') - - machine_class_id = Column(ForeignKey(MachineClass.id)) - machine_class = relationship(MachineClass, backref='tuning_runs') - - input_class_id = Column(ForeignKey(InputClass.id)) - input_class = relationship(InputClass, backref='tuning_runs') - - name = Column(String(128), default='unnamed') - args = Column(PickleType(pickler=CompressedPickler)) - objective = Column(PickleType(pickler=CompressedPickler)) - - state = Column(Enum('QUEUED', 'RUNNING', 'COMPLETE', 'ABORTED', - name='t_tr_state'), - default='QUEUED') - start_date = Column(DateTime, default=func.now()) - end_date = Column(DateTime) - - final_config_id = Column(ForeignKey(Configuration.id)) - final_config = relationship(Configuration) - - #__mapper_args__ = {'primary_key': uuid} - - @property - def program(self): - return self.program_version.program - - -class Result(Base): - #set by MeasurementDriver: - configuration_id = Column(ForeignKey(Configuration.id)) - configuration = relationship(Configuration) - - machine_id = Column(ForeignKey(Machine.id)) - machine = relationship(Machine, backref='results') - - input_id = Column(ForeignKey(Input.id)) - input = relationship(Input, backref='results') - - tuning_run_id = Column(ForeignKey(TuningRun.id), index=True) - tuning_run = relationship(TuningRun, backref='results') - - collection_date = Column(DateTime, default=func.now()) - collection_cost = Column(Float) - - #set by MeasurementInterface: - state = Column(Enum('OK', 'TIMEOUT', 'ERROR', - name='t_result_state'), - default='OK') - time = Column(Float) - accuracy = Column(Float) - energy = Column(Float) - size = Column(Float) - confidence = Column(Float) - #extra = Column(PickleType) - - #set by SearchDriver - was_new_best = Column(Boolean) - - -Index('ix_result_custom1', Result.tuning_run_id, Result.was_new_best) - - -class DesiredResult(Base): - #set by the technique: - configuration_id = Column(ForeignKey(Configuration.id)) - configuration = relationship(Configuration) - limit = Column(Float) - - #set by the search driver - priority = Column(Float) - tuning_run_id = Column(ForeignKey(TuningRun.id)) - tuning_run = relationship(TuningRun, backref='desired_results') - generation = Column(Integer) - requestor = Column(String(128)) - request_date = Column(DateTime, default=func.now()) - - #set by the measurement driver - state = Column(Enum('UNKNOWN', 'REQUESTED', 'RUNNING', - 'COMPLETE', 'ABORTED', - name="t_dr_state"), - default='UNKNOWN') - result_id = Column(ForeignKey(Result.id), index=True) - result = relationship(Result, backref='desired_results') - start_date = Column(DateTime) - - #input_id = Column(ForeignKey(Input.id)) - #input = relationship(Input, backref='desired_results') - - -Index('ix_desired_result_custom1', DesiredResult.tuning_run_id, - DesiredResult.generation) - -Index('ix_desired_result_custom2', DesiredResult.tuning_run_id, - DesiredResult.configuration_id) - - -# track bandit meta-technique information if a bandit meta-technique is used for a tuning run. -class BanditInfo(Base): - tuning_run_id = Column(ForeignKey(TuningRun.id)) - tuning_run = relationship(TuningRun, backref='bandit_info') - # the bandit exploration/exploitation tradeoff - c = Column(Float) - # the bandit window - window = Column(Integer) - -class BanditSubTechnique(Base): - bandit_info_id = Column(ForeignKey(BanditInfo.id)) - bandit_info = relationship(BanditInfo, backref='subtechniques') - name = Column(String(128)) - - -if __name__ == '__main__': - #test: - engine = create_engine('sqlite:///:memory:', echo=True) - Base.metadata.create_all(engine) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/__init__.py deleted file mode 100644 index bb4ce57bb2d1760bd9fb6ebe196f39072a43ab4a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ - -import driver -import objective -import plugin -import technique - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/bandittechniques.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/bandittechniques.py deleted file mode 100644 index 29816c03de1c52b4b6318991faafb488952e4019..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/bandittechniques.py +++ /dev/null @@ -1,316 +0,0 @@ -import abc -import copy -import logging -import math -import random -from collections import deque - -from .metatechniques import MetaSearchTechnique -from .technique import register, SearchTechnique, all_techniques, get_random_generator_technique - -log = logging.getLogger(__name__) - - -class BanditQueue(object): - def __init__(self, keys, C=0.05, window=500, **kwargs): - """ - C is exploration/exploitation tradeoff - window is how long to remember past results - """ - super(BanditQueue, self).__init__(**kwargs) - self.C = C - self.history = deque() - self.keys = keys - self.use_counts = dict(((k, 0) for k in keys)) - self.window = window - self.request_count = 0 - - @abc.abstractmethod - def exploitation_term(self, key): - """ - value 0 to 1.0 to represent quality of technique - """ - return 0.0 - - def exploration_term(self, key): - """ - value represent how unsure we are (optimal bandit solution) - """ - if self.use_counts[key] > 0: - return math.sqrt((2.0 * math.log(len(self.history), 2.0)) - / self.use_counts[key]) - else: - return float('inf') - - def bandit_score(self, key): - return (self.exploitation_term(key) + - self.C * self.exploration_term(key)) - - def ordered_keys(self): - """select the next technique to use""" - - keys = list(self.keys) - random.shuffle(keys) # break ties randomly - keys.sort(key=self.bandit_score) - - self.request_count += 1 - if log.isEnabledFor(logging.DEBUG) and (self.request_count % 1000) == 0: - log.debug(str([ - (t, self.exploitation_term(t), self.C * self.exploration_term(t)) - for t in keys])) - - return reversed(keys) - - def on_result(self, key, value): - self.history.append((key, value)) - self.on_push_history(key, value) - if len(self.history) > self.window: - self.on_pop_history(*self.history.popleft()) - - def on_push_history(self, key, value): - self.use_counts[key] += 1 - - def on_pop_history(self, key, value): - self.use_counts[key] -= 1 - - -class AUCBanditQueue(BanditQueue): - """ - Area Under the Receiving Operator Curve (AUC) credit assignment - - See: - Comparison-based adaptive strategy selection with bandits in differential - evolution. Fialho et al. - """ - - def __init__(self, *args, **kwargs): - super(AUCBanditQueue, self).__init__(*args, **kwargs) - self.debug = kwargs.get('debug', False) - self.auc_sum = dict(((t, 0) for t in self.keys)) - self.auc_decay = dict(((t, 0) for t in self.keys)) - - def exploitation_term_slow(self, key): - """ - value 0 to 1.0 to represent quality of key - - computes the area under the curve where finding a new - global best results in adding 1 to a cumulative total - """ - score = 0.0 - pos = 0 - for t, value in self.history: - if t is key: - pos += 1 - if value: - score += pos - if pos: - return score * 2.0 / (pos * (pos + 1.0)) - else: - return 0.0 - - def exploitation_term_fast(self, key): - """ - value 0 to 1.0 to represent quality of key - - optimized O(1) implementation exploitation_term_slow() - """ - score = self.auc_sum[key] - pos = self.use_counts[key] - if pos: - return score * 2.0 / (pos * (pos + 1.0)) - else: - return 0.0 - - def exploitation_term(self, key): - v1 = self.exploitation_term_fast(key) - if self.debug: - v2 = self.exploitation_term_slow(key) - assert v1 == v2 - return v1 - - def on_push_history(self, key, value): - super(AUCBanditQueue, self).on_push_history(key, value) - if value: - self.auc_sum[key] += self.use_counts[key] - self.auc_decay[key] += 1 - - def on_pop_history(self, key, value): - super(AUCBanditQueue, self).on_pop_history(key, value) - self.auc_sum[key] -= self.auc_decay[key] - if value: - self.auc_decay[key] -= 1 - - - -class AUCBanditMetaTechnique(MetaSearchTechnique): - def __init__(self, techniques, bandit_kwargs=dict(), **kwargs): - super(AUCBanditMetaTechnique, self).__init__(techniques, **kwargs) - self.bandit = AUCBanditQueue([t.name for t in techniques], **bandit_kwargs) - self.name_to_technique = dict(((t.name, t) for t in self.techniques)) - - def select_technique_order(self): - """select the next technique to use""" - return (self.name_to_technique[k] for k in self.bandit.ordered_keys()) - - def on_technique_result(self, technique, result): - self.bandit.on_result(technique.name, result.was_new_best) - - def on_technique_no_desired_result(self, technique): - """treat not providing a configuration as not a best""" - self.bandit.on_result(technique.name, 0) - - @classmethod - def generate_technique(cls, manipulator=None, num_techniques=5, retry_count=3, generator_weight=10, *args, **kwargs): - """ - Generate a bandit by randomly selecting existing techniques or composable techniques. - If a composable technique is selected, the operators are then chosen - - :param manipulator: a ConfigurationManipulator used to enumerate parameters - :param num_techniques: max number of subtechniques in the bandit - :param retry_count: number of times to try getting a new technique before giving up - :param generator_weight: weight to increase probability of choosing to generate a technique - """ - techniques, generators = all_techniques() - - # get set of parameters to consider - paramset = set() - for p in manipulator.params: - paramset.add(type(p)) - - # filter techniques to get rid of metatechniques - basetechniques = [t for t in techniques if not isinstance(t, MetaSearchTechnique)] - bandit_techniques = [] - for i in range(num_techniques): - for j in range(retry_count): - # pick a technique or generate a composable - if random.random() < float(len(basetechniques)) / (len(basetechniques) + generator_weight*len(generators)): - candidate = copy.deepcopy(random.choice(basetechniques)) - else: - # pick a random generator - candidate = get_random_generator_technique(generators, manipulator=manipulator) - if not (candidate.name in [t.name for t in bandit_techniques]): - bandit_techniques.append(candidate) - break - - # make a bandit of the output list - return cls(bandit_techniques, name="GeneratedBandit", *args, **kwargs) - - -class AUCBanditMutationTechnique(SearchTechnique): - def __init__(self, bandit_kwargs=dict(), **kwargs): - super(AUCBanditMutationTechnique, self).__init__(**kwargs) - self.bandit = None - self.bandit_kwargs = bandit_kwargs - self.pending_results = [] - - def handle_requested_result(self, result): - for i in xrange(len(self.pending_results)): - cfg, name, index = self.pending_results[i] - if result.configuration == cfg: - self.bandit.on_result((name, index), result.was_new_best) - del self.pending_results[i] - return - log.warning("unexpected result") - - def desired_configuration(self): - """ - use bandit to pick a single manipulator and apply it - """ - seed = self.get_seed() - if self.bandit is None: - self.init_bandit(seed) - - cfg = self.manipulator.copy(seed) - hash1 = self.manipulator.hash_config(cfg) - params = self.manipulator.parameters_dict(cfg) - for name, index in self.bandit.ordered_keys(): - if name in params: - param = params[name] - fns = param.manipulators(cfg) - fn = fns[index % len(fns)] - fn(cfg) - hash2 = self.manipulator.hash_config(cfg) - if hash1 != hash2: - cfg = self.driver.get_configuration(cfg) - self.pending_results.append((cfg, name, index)) - log.debug("applied %s[%s] manipulator function", name, index) - return cfg - - return None - - - def init_bandit(self, cfg): - options = [] - for param in self.manipulator.parameters(cfg): - for i in xrange(len(param.manipulators(cfg))): - options.append((param.name, i)) - # TODO(jansel): remove assumption that set of parameters are fixed - self.bandit = AUCBanditQueue(options, **self.bandit_kwargs) - - def get_seed(self): - """seed mutation with global best""" - if (self.driver.best_result is not None and - self.driver.best_result.state == 'OK'): - return self.driver.best_result.configuration.data - else: - return self.manipulator.random() - - -import evolutionarytechniques -import differentialevolution -import simplextechniques -import patternsearch -import simulatedannealing -from pso import PSO, HybridParticle -import globalGA -register(AUCBanditMutationTechnique()) - -register(AUCBanditMetaTechnique([ - differentialevolution.DifferentialEvolutionAlt(), - evolutionarytechniques.UniformGreedyMutation(), - evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3), - simplextechniques.RandomNelderMead(), - ], name = "AUCBanditMetaTechniqueA")) -register(AUCBanditMetaTechnique([ - differentialevolution.DifferentialEvolutionAlt(), - evolutionarytechniques.UniformGreedyMutation(), - ], name = "AUCBanditMetaTechniqueB")) -register(AUCBanditMetaTechnique([ - differentialevolution.DifferentialEvolutionAlt(), - patternsearch.PatternSearch(), - ], name = "AUCBanditMetaTechniqueC")) -register(AUCBanditMetaTechnique([ - PSO(crossover = 'op3_cross_OX3'), - PSO(crossover = 'op3_cross_OX1'), - PSO(crossover = 'op3_cross_CX'), - PSO(crossover = 'op3_cross_PMX'), - PSO(crossover = 'op3_cross_PX'), - evolutionarytechniques.GA(crossover = 'op3_cross_OX3', mutation_rate=0.01, crossover_rate=0.8), - evolutionarytechniques.GA(crossover = 'op3_cross_OX1', mutation_rate=0.01, crossover_rate=0.8), - evolutionarytechniques.GA(crossover = 'op3_cross_CX', mutation_rate=0.01, crossover_rate=0.8), - evolutionarytechniques.GA(crossover = 'op3_cross_PX', mutation_rate=0.01, crossover_rate=0.8), - evolutionarytechniques.GA(crossover = 'op3_cross_PMX', mutation_rate=0.01, crossover_rate=0.8), - evolutionarytechniques.UniformGreedyMutation(name='ga-base', mutation_rate=0.01) - ], name = "PSO_GA_Bandit")) -register(AUCBanditMetaTechnique([ - differentialevolution.DifferentialEvolutionAlt(), - simulatedannealing.PseudoAnnealingSearch() - ], name = "test")) -register(AUCBanditMetaTechnique([ - differentialevolution.DifferentialEvolutionAlt(), - evolutionarytechniques.UniformGreedyMutation(), - evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3), - simplextechniques.RandomNelderMead(), - simulatedannealing.PseudoAnnealingSearch() - ], name = "test2")) -register(AUCBanditMetaTechnique([ - PSO(crossover='op3_cross_OX1'), - PSO(crossover='op3_cross_PMX'), - PSO(crossover='op3_cross_PX'), - evolutionarytechniques.GA(crossover='op3_cross_OX1', crossover_rate=0.5), - evolutionarytechniques.GA(crossover='op3_cross_PMX', crossover_rate=0.5), - evolutionarytechniques.GA(crossover='op3_cross_PX', crossover_rate=0.5), - differentialevolution.DifferentialEvolutionAlt(), - globalGA.NormalGreedyMutation( crossover_rate=0.5, crossover_strength=0.2, name='GGA') - ], name='PSO_GA_DE')) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/composableevolutionarytechniques.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/composableevolutionarytechniques.py deleted file mode 100644 index e511744f30b8a7d271539e4ed26e247b5574c2b5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/composableevolutionarytechniques.py +++ /dev/null @@ -1,512 +0,0 @@ -import random -import time -import sys -import json -from fn import _ -from technique import all_techniques -from technique import register -from technique import register_generator -from technique import SequentialSearchTechnique -from manipulator import * -from opentuner.search.manipulator import Parameter - - -class PopulationMember(object): - """ - An extendable object representing a population member for ComposableEvolutionaryTechniques. - Must have the field "config" which is a configuration - """ - def __init__(self, config): - self.config = config - self.timestamp = time.time() - - def touch(self): - """ - Update the timestamp on a PopulationMember - """ - self.timestamp = time.time() - - -class ComposableEvolutionaryTechnique(SequentialSearchTechnique): - """ - An abstract base class for a technique that is composable with operators - """ - __metaclass__ = abc.ABCMeta - - # operator_map - from param-type to dict with operator name + list of arguments TODO - # min_parent - minimum number of parents returned. Limits which operators can be used - def __init__(self, - operator_map = {}, - population_size = 50, - initial_configs = None, - *pargs, - **kwargs): - """ - - :param operator_map: - :param population_size: - :param initial_configs: - :param pargs: - :param kwargs: - :return: - """ - super(ComposableEvolutionaryTechnique, self).__init__(*pargs, **kwargs) - # generate a name based on operators if no name - - self.initial_configurations = initial_configs - self.population_size = population_size - self.operator_map = operator_map # map from parameter type to an operator function - - def set_operator_map(self, operator_map): - self.operator_map = operator_map - - @classmethod - def get_hyper_parameters(cls): - return ['population_size'] - - def default_generated_name(self): - """ - Gets the default name for this technique based on its operator map - - Name is in the format - classname paramname;opname;[arg1,arg2,[[kwarg1,v1][kwarg2,v2]]] paramname2;opname2;... - """ - # TODO - include technique hyperparameters - parts = [self.base_name()] - for param in sorted(self.operator_map, cmp=lambda x,y: cmp(x.__name__, y.__name__)): - subparts = [param.__name__] - operator_info = self.operator_map[param] - subparts.append(operator_info['op_name']) - args = list(operator_info['args']) - kwargs = operator_info['kwargs'] - args.append([(k,kwargs[k]) for k in sorted(kwargs)]) - subparts.append(json.dumps(args, separators=(',', ':'))) - parts.append(';'.join(subparts)) - return ' '.join(parts) - - - def make_population_member(self, config): - """ - Given a configuration, returns an object representing a single member of the - population with the given configuration. Meta-data about the configuration, - such as last selection time as a parent, can be attached to the object. - - This can be overriden to return a custom population member for use in - :py:meth:`get_parents` and :py:meth:`update_population` - - :param config: the configuration that this population member will represent - :return: a population member reresenting the input configuration. - """ - return PopulationMember(config) - - def select_parameters(self, params): - """ - Given all the available parameters, return a subset of parameters to operate - on when generating a new configuration. - - Can override this to operate on only a subset of parameters. - - :param params: a list of all the available parameters - :return: a subset of params - """ - return params - - @abc.abstractmethod - def minimum_number_of_parents(self): - """ - Return the minimum number of parents ever returned by :py:meth:`get_parents`. - This limits which operators can be composed with the technique. Operators - requiring more input configurations than the minimum number of parents will - result in an error. - - :return: the minimum number of parents ever generated. - """ - return 1 - - @abc.abstractmethod - def get_parents(self, population): - """ - Given the current population, return a list of configurations that will be - used to generate a new configuration via operators. Returning less parents - than guaranteed by :py:meth:`minimum_number_of_parents` results in an error. - - The parents will be passed to operators in order. If there are more parents - than required by an operator, extra parents will be discarded. - - Note that operators mutate the first configuration passed in. - - :param population: the current population in the technique - :return: a list of parent configurations to generate a new configuration from - """ - pass - - @abc.abstractmethod - def update_population(self, config, population): - """ - Update the population given the newest configuration and current population - in the technique. should return the new population - - :param config: the newest generated configuration - :param population: the current population in this iteration of the technique - :return: the updated population - """ - pass - - def get_initial_population(self): - """ - Returns an initial population by passing initial configurations into - :py:meth:`make_population_member` - - :return: an initial list of objects returned by :py:meth:`make_population_member`. - """ - init_configs = self.initial_configurations - if not init_configs: - init_configs = [self.manipulator.random() for i in range(self.population_size)] - return [PopulationMember(config) for config in init_configs] - - def lt(self, cfg_a, cfg_b): - """ - Return whether cfg_a has a better objective function score than cfg_b - - :param cfg_a: first configuration - :param cfg_b: second configuration - :return: True if cfg_a is better than cfg_b - """ - def config(cfg): - return self.driver.get_configuration(cfg) - return self.objective.lt(config(cfg_a), config(cfg_b)) - - def lte(self, cfg_a, cfg_b): - """ - Return whether cfg_a's objective function score is at least as good as cfg_b's - score - - :param cfg_a: first configuration - :param cfg_b: second configuration - :return: True if cfg_a is at least as good as cfg_b - """ - def config(cfg): - return self.driver.get_configuration(cfg) - return self.objective.lte(config(cfg_a), config(cfg_b)) - - def get_global_best_configuration(self): - """ - Return the current global best configuration in the search - - :return: the current global best configuration - """ - if (self.driver.best_result is not None and - self.driver.best_result.state == 'OK'): - return self.manipulator.copy(self.driver.best_result.configuration.data) - else: - return self.manipulator.random() - - def get_default_operator(self, param_type): - """ - Given a parameter type, return a dictionary with information about the - operator to be used for the parameter. The returned dictionary must contain - the following 3 key, value pairs - - 1. 'op_name' - the string name of the operator - 2. 'args' - an iterable of the non-configuration arguments in order - 3. 'kwargs' - a dictionary from any optional arguments to their values - - :return: a dictionary containing information about the operator to apply for the input parameter type - """ - return {'op_name': 'op1_nop', 'args': [], 'kwargs': {}} - - # HELPER METHODS FOR BUILDING OPERATOR MAP - @classmethod - def add_to_map(cls, operator_map, param_type, operator_name, *args, **kwargs): - """ - A helper method for adding parameter to operator mappings into the operator - map. - - :param operator_map: the operator map to add to - :param param_type: the parameter type to use the this operator on - :param operator_name: the string name of the operator method - :param *args: any non-configuration arguments to the operator - :param **kwargs: any keyword arguemnts for the operator - """ - if(isinstance(param_type, Parameter)): - ptype = type(param_type) - elif (type(param_type) == str): - ptype = reduce(getattr, param_type.split("."), sys.modules[__name__]) - else: - ptype = param_type; - - operator_map[ptype] = {'op_name': operator_name, 'args':args, 'kwargs':kwargs} - - - def main_generator(self): - """ - The primary body of the search technique. - Initializes an initial population and then yields configurations by applying - operators to get_parents. - """ - min_parents = self.minimum_number_of_parents(); - # convert a manipulator configuration to a db.models.Configuration - def get_driver_configuration(cfg): - return self.driver.get_configuration(cfg) - - # initialize the population - population = self.get_initial_population() - - # measure initial population - for p in population: - yield get_driver_configuration(p.config) - - while True: - # get parents - parents = self.get_parents(population) - if len(parents) < min_parents: - log.error("%s: Number of parents returned %d is less than the guaranteed" - + " minimum returned by minimum_number_of_parents() %d. ", - self.name, len(parents), min_parents) - # fail and let other techniques work forever - while True: - yield None - - params = self.select_parameters(self.manipulator.params) - config = self.get_new_config(parents, params) - yield get_driver_configuration(config) - - population = self.update_population(config, population) - - # safety check that population has all been tested - for p in population: - if not self.driver.has_results(get_driver_configuration(p.config)): - yield get_driver_configuration(p.config) - - def get_new_config(self, parents, params): - """ - Return a new configuration to test, given a list of parent configurations - This mutates the first parent - - :param parents: A list of parent configurations - :params: A list of parameters to operate on - :return: The mutated configuration (first parent) - """ - for param in params: - self.apply_operator(param, parents) #TODO - return parents[0] - - def apply_operator(self, param, parents): - """ - Apply the appropriate operator for param to parents. - If an operator takes less input configurations than the number of parents, - only the first parents are passed in. If operator takes more input configs - than minimum_number_of_parents, logs an error and doesn't do anything - """ - x = self.get_operator(type(param)) - - operator_name = x['op_name'] - if not self.is_valid_operator(type(param), operator_name): - # do nothing - return - - # operator is already in valid form and starts with op1, op2, op3, op4, or opn - num_parents_required = operator_name[2] - if num_parents_required == 'n': - args = parents[0] + [parents[1:]] - else: - num_parents_required = int(num_parents_required) - args = parents[:num_parents_required] - args.extend(x['args']) - - kwargs = x['kwargs'] - - getattr(param, operator_name)(*args, **kwargs) - - def get_operator(self, param_type): - if param_type in self.operator_map: - return self.operator_map[param_type] - return self.get_default_operator(param_type) - - def is_valid_operator(self, param_type, operator_name): - if not hasattr(param_type, operator_name): - log.error("%s: %s is not a valid operator for Parameter type %s", - self.name, operator_name, param_type.__name__) - return False - - if operator_name[:3] not in ['op1','op2','op3','op4','opn']: - log.error("%s: %s is not a valid operator for Parameter type %s", - self.name, operator_name, param_type.__name__) - return False - - num_parents_required = operator_name[2] - if num_parents_required == 'n': - return True - - num_parents_required = int(num_parents_required) - minimum_number_of_parents = self.minimum_number_of_parents() - - if num_parents_required > minimum_number_of_parents: - log.error("%s: %s for Parameter type %s requires more input configs " - + "than minimum number of parents, %d, produced by this technique", - self.name, operator_name, param_type.__name__, minimum_number_of_parents) - return False - - return True - - @classmethod - def generate_technique(cls, manipulator=None, *args, **kwargs): - """ - generate a composable technique with random operators - """ - from manipulator import composable_operators - # randomly select a composable technique to generate - t = cls(*args, **kwargs) - if manipulator is None: - return t - - paramset = set() - for p in manipulator.params: - paramset.add(type(p)) - - # add some random operator for each param - operator_map = {} - for param in paramset: - operators = composable_operators(param, t.minimum_number_of_parents()) - # TODO - sometimes use "default" operator (don't choose an operator? - # TODO - lower chance of picking op1_nop? - ComposableEvolutionaryTechnique.add_to_map(operator_map, param, random.choice(operators)) - - t.set_operator_map(operator_map) - t.use_default_generated_name() - return t - - -class RandomThreeParentsComposableTechnique(ComposableEvolutionaryTechnique): - """ - based on DifferentialEvolution - """ - - def __init__(self, cr = 0.9, must_mutate_count=1, information_sharing=1, *pargs, **kwargs): - super(RandomThreeParentsComposableTechnique, self).__init__(*pargs, **kwargs) - self.cr = cr - self.must_mutate_count = must_mutate_count - self.information_sharing = information_sharing - - @classmethod - def get_hyper_parameters(cls): - return ['population_size', 'cr', 'must_mutate_count', 'information_sharing'] - - def minimum_number_of_parents(self): - return 4 - - def get_parents(self, population): - self.use_f = random.random() - population.sort(key=_.timestamp) # sort population by timestamp - - # copy oldest - cfg = self.manipulator.copy(population[0].config) - - shuffled_population = map(_.config, population[1:]) - # mix in the global best configuration - shuffled_population += ([self.get_global_best_configuration()] - * self.information_sharing) - random.shuffle(shuffled_population) - - # return oldest configuration +_3 other configurations - return [cfg] + shuffled_population[0:3] - - def update_population(self, config, population): - # replace the oldest configuration if the new one is better. - population.sort(key=_.timestamp) - if self.lt(config, population[0].config): - population[0].config = config - - # mark that oldest configuration is updated - population[0].touch() - - return population - - def select_parameters(self, params): - """ - randomly select a subset of parameters to operate on - """ - random.shuffle(params) - ret_list = params[:self.must_mutate_count] - for param in params[self.must_mutate_count:]: - if random.random() < self.cr: - ret_list.append(param) - return ret_list - - def get_default_operator(self, param_type): - return {'op_name': 'op4_set_linear', 'args': [1.0, self.use_f, -self.use_f], 'kwargs': {}} - -class GreedyComposableTechnique(ComposableEvolutionaryTechnique): - """ - Always mixes in global best as parents - """ - def __init__(self, - mutation_rate = 0.1, - must_mutate_count = 1, - population_size = 10, - *pargs, **kwargs): - super(GreedyComposableTechnique, self).__init__(*pargs, **kwargs) - self.mutation_rate = mutation_rate - self.must_mutate_count = must_mutate_count - self.population_size = population_size - - - @classmethod - def get_hyper_parameters(cls): - return ['mutation_rate', 'must_mutate_count'] - - def minimum_number_of_parents(self): - # specify that we will return at least 4 cfgs from get_parents - # this maximizes # of operators we can use - return 4 - - def get_parents(self, population): - population.sort(key=_.timestamp) # sort population by timestamp - - # get a 50-50 mix of base and best cfgs as many operators do nothing given identical input cfgs - cfg = self.manipulator.copy(population[0].config) - # duplicate to get a total of 4 configurations to fulfill the promise in minimum_number_of_parents - cfgs = [self.get_global_best_configuration(), cfg]*2 - # return a random 50-50 mix of the current configuration and global best to pass into operators - random.shuffle(cfgs) - return cfgs - - def update_population(self, config, population): - # replace the oldest configuration if the new one is better. - population.sort(key=_.timestamp) - if self.lt(config, population[0].config): - population[0].config = config - - # mark that oldest configuration is updated - population[0].touch() - - return population - - def select_parameters(self, params): - random.shuffle(params) - ret_list = params[:self.must_mutate_count] - for param in params[self.must_mutate_count:]: - if random.random() < self.mutation_rate: - ret_list.append(param) - return ret_list - - def get_default_operator(self, param_type): - return {'op_name': 'op1_randomize', 'args': [], 'kwargs':{}} - - -register(RandomThreeParentsComposableTechnique(name='ComposableDiffEvolution', - population_size=30)) -register_generator(RandomThreeParentsComposableTechnique) -register_generator(GreedyComposableTechnique) - - -op_map = {} -ComposableEvolutionaryTechnique.add_to_map(op_map, - PermutationParameter, - "op3_cross", xchoice='op3_cross_CX') -ComposableEvolutionaryTechnique.add_to_map(op_map, - "FloatArray", - "op3_cross", strength=0.4) -register(RandomThreeParentsComposableTechnique(name='ComposableDiffEvolutionCX', - operator_map=op_map, - population_size=30)) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/differentialevolution.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/differentialevolution.py deleted file mode 100644 index cecffc460c5cdbef184fc244a70f9a6af251bddd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/differentialevolution.py +++ /dev/null @@ -1,148 +0,0 @@ -import random -import time -import logging -from fn import _ -from technique import register -from technique import SearchTechnique - -log = logging.getLogger(__name__) -log.setLevel(logging.WARNING) - - -class PopulationMember(object): - def __init__(self, config, submitted=True): - self.config = config - self.submitted = submitted - self.timestamp = time.time() - self.candidate_replacement = None - - def touch(self): - self.timestamp = time.time() - - -class DifferentialEvolution(SearchTechnique): - """ - based on http://cci.lbl.gov/cctbx_sources/scitbx/differential_evolution.py - """ - - def __init__(self, - population_size=30, - cr=0.9, # crossover rate - n_cross=1, # force at least 1 to crossover - information_sharing=1, # number token sharing pop members - duplicate_retries=5, # how many times to retry on duplicate - *pargs, **kwargs): - - self.population_size = population_size - self.cr = cr - self.n_cross = n_cross - self.information_sharing = information_sharing - self.population = None - self.duplicate_retries = duplicate_retries - self.limit = None - super(DifferentialEvolution, self).__init__(*pargs, **kwargs) - - @classmethod - def get_hyper_parameters(cls): - return ['population_size', 'cr', 'n_cross', 'information_sharing'] - - def initial_population(self): - self.population = [PopulationMember( - self.driver.get_configuration( - self.manipulator.random()), submitted=False) - for z in xrange(self.population_size)] - - def oldest_pop_member(self): - # since tests are run in parallel, exclude things with a replacement pending - pop_without_replacements = filter(lambda x: x.candidate_replacement is None, - self.population) - if not pop_without_replacements: - # everything has a pending replacement - return None - pop_without_replacements.sort(key=_.timestamp) - return pop_without_replacements[0] - - def desired_configuration(self): - """ - return a cfg that we should test, - """ - if not self.population: - # first time called - self.initial_population() - - # make sure initial population is completely submitted - for p in self.population: - if not p.submitted: - p.submitted = True - if p is self.population[-1]: - log.info('initial population testing done') - return p.config - - # pp is member of population to be replaced - oldest_pop_member = self.oldest_pop_member() - if not oldest_pop_member: - return None - - config = None - for retry in xrange(self.duplicate_retries): - config = self.driver.get_configuration( - self.create_new_configuration(oldest_pop_member)) - if not self.driver.has_results(config): - break - # new configuration would have been a duplicate, try again - - oldest_pop_member.touch() # move to back of the line for next replacement - oldest_pop_member.candidate_replacement = config - self.limit = self.driver.objective.limit_from_config( - oldest_pop_member.config) - return oldest_pop_member.candidate_replacement - - def create_new_configuration(self, parent_pop_member): - cfg = self.manipulator.copy(parent_pop_member.config.data) - cfg_params = self.manipulator.proxy(cfg) - - # pick 3 random parents, not pp - shuffled_pop = list(set(self.population) - set([parent_pop_member])) - - # share information with other techniques - if self.driver.best_result: - shuffled_pop += ([PopulationMember(self.driver.best_result.configuration)] - * self.information_sharing) - - random.shuffle(shuffled_pop) - x1, x2, x3 = map(_.config.data, shuffled_pop[0:3]) - - use_f = random.random() / 2.0 + 0.5 - - params = self.manipulator.param_names(cfg, x1, x2, x3) - random.shuffle(params) - for i, k in enumerate(params): - if i < self.n_cross or random.random() < self.cr: - # cfg = x1 + use_f*(x2 - x3) - cfg_params[k].op4_set_linear(x1, x2, x3, 1.0, use_f, -use_f) - - return cfg - - def handle_requested_result(self, result): - """called when new results are added""" - for p in self.population: - if p.candidate_replacement == result.configuration: - if self.objective.lt(p.candidate_replacement, p.config): - # candidate replacement was better, replace it! - p.config = p.candidate_replacement - log.info('better point') - p.candidate_replacement = None - - -class DifferentialEvolutionAlt(DifferentialEvolution): - def __init__(self, cr=0.2, **kwargs): - kwargs['cr'] = cr - super(DifferentialEvolutionAlt, self).__init__(**kwargs) - - -register(DifferentialEvolution()) -register(DifferentialEvolutionAlt()) -register(DifferentialEvolution(population_size=100, cr=0.2, - name='DifferentialEvolution_20_100')) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/driver.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/driver.py deleted file mode 100644 index 7924e36e6fbc772e375ce344e8de919f66e8c6b4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/driver.py +++ /dev/null @@ -1,301 +0,0 @@ -import argparse -import copy -import logging -import os -import sys - -from datetime import datetime -from fn import _ -from opentuner.driverbase import DriverBase -from opentuner.resultsdb.models import Configuration -from opentuner.resultsdb.models import DesiredResult -from opentuner.resultsdb.models import Result -from opentuner.resultsdb.models import BanditInfo -from opentuner.resultsdb.models import BanditSubTechnique -from opentuner.search import plugin -from opentuner.search import technique -from opentuner.search.bandittechniques import AUCBanditMetaTechnique - -log = logging.getLogger(__name__) - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--test-limit', type=int, default=5000, - help='stop tuning after given tests count') -argparser.add_argument('--stop-after', type=float, - help='stop tuning after given seconds') -argparser.add_argument('--parallelism', type=int, default=8, - help='how many tests to support at once') -argparser.add_argument('--pipelining', type=int, default=0, - help='how long a delay (in generations) before results are available') -argparser.add_argument('--bail-threshold', type=int, default=5000, - help='abort if no requests have been made in X generations') -argparser.add_argument('--no-dups', action='store_true', - help='don\'t print out warnings for duplicate requests') -argparser.add_argument('--seed-configuration', action='append', default=[], - metavar='FILENAME', help=""" - Start search at a given configuration. Can be - specified multiple times. Configurations are loaded - with ConfigurationManipulator.load_from_file() - and file format is detected from extension.""") - - -class SearchDriver(DriverBase): - """ - controls the search process managing root_technique and creating - DesiredResults - """ - - def __init__(self, manipulator, extra_seeds=None, extra_criteria=None, **kwargs): - super(SearchDriver, self).__init__(**kwargs) - if extra_seeds is None: - extra_seeds = [] - self.manipulator = manipulator - self.wait_for_results = self.tuning_run_main.results_wait - self.commit = self.tuning_run_main.commit - self.extra_criteria = extra_criteria - - self.generation = 0 - self.test_count = 0 - self.plugins = plugin.get_enabled(self.args) - self.pending_result_callbacks = list() # (DesiredResult, function) tuples - # deepcopy is required to have multiple tuning runs in a single process - if self.args.list_techniques: - techniques, generators = technique.all_techniques() - for t in techniques: - print t.name - sys.exit(0) - - if self.args.generate_bandit_technique: - # generate a bandit - self.root_technique = AUCBanditMetaTechnique.generate_technique(manipulator) - else: - self.root_technique = copy.deepcopy(technique.get_root(self.args)) - - if isinstance(self.root_technique, AUCBanditMetaTechnique): - self.session.flush() - info = BanditInfo(tuning_run=self.tuning_run, - c=self.root_technique.bandit.C, - window=self.root_technique.bandit.window,) - self.session.add(info) - for t in self.root_technique.techniques: - subtechnique = BanditSubTechnique(bandit_info=info, - name=t.name) - self.session.add(subtechnique) - - - self.objective.set_driver(self) - self.pending_config_ids = set() - self.best_result = None - self.new_results = [] - - for t in self.plugins: - t.set_driver(self) - self.root_technique.set_driver(self) - self.seed_cfgs = list(extra_seeds) - for cfg_filename in reversed(self.args.seed_configuration): - if os.path.exists(cfg_filename): - self.seed_cfgs.append(manipulator.load_from_file(cfg_filename)) - else: - log.error('no such file for --seed-configuration %s', cfg_filename) - - self.plugins.sort(key=_.priority) - - def add_plugin(self, p): - if p in self.plugins: - return - self.plugins.append(p) - self.plugins.sort(key=_.priority) - p.set_driver(self) - - def convergence_criteria(self): - """returns true if the tuning process should stop""" - if self.args.stop_after: - elapsed = (datetime.now() - self.tuning_run.start_date) - try: - elapsed = elapsed.total_seconds() - except: # python 2.6 - elapsed = elapsed.days * 86400 + elapsed.seconds - if elapsed > self.args.stop_after: - return True - if self.test_count > self.args.test_limit: - return True - if self.extra_criteria: - if self.extra_criteria(self.new_results): - return True - return False - - def register_result_callback(self, desired_result, callback): - if desired_result.result is not None: - callback(desired_result.result) - else: - self.pending_result_callbacks.append((desired_result, callback)) - - def result_callbacks(self): - pending = self.pending_result_callbacks - self.pending_result_callbacks = list() - for dr, callback in pending: - if dr.result is not None: - callback(dr.result) - continue - elif self.generation - dr.generation > self.args.pipelining: - # see if we can find a result - results = self.results_query(config=dr.configuration).all() - log.warning("Result callback %d (requestor=%s) pending for " - "%d generations %d results available", - dr.id, dr.requestor, self.generation - dr.generation, - len(results)) - if len(results): - dr.result = results[0] - callback(dr.result) - continue - # try again later - self.pending_result_callbacks.append((dr, callback)) - - def has_results(self, config): - return self.results_query(config=config).count() > 0 - - def run_generation_techniques(self): - tests_this_generation = 0 - self.plugin_proxy.before_techniques() - for z in xrange(self.args.parallelism): - - if self.seed_cfgs: - config = self.get_configuration(self.seed_cfgs.pop()) - dr = DesiredResult(configuration=config, - requestor='seed', - generation=self.generation, - request_date=datetime.now(), - tuning_run=self.tuning_run) - else: - dr = self.root_technique.desired_result() - if dr is None or dr is False: - log.debug("no desired result, skipping to testing phase") - break - self.session.flush() # populate configuration_id - duplicates = (self.session.query(DesiredResult) - .filter_by(tuning_run=self.tuning_run, - configuration_id=dr.configuration_id) - .filter(DesiredResult.id != dr.id) - .order_by(DesiredResult.request_date) - .limit(1).all()) - self.session.add(dr) - if len(duplicates): - if not self.args.no_dups: - log.warning("duplicate configuration request #%d %s/%s %s", - self.test_count, - dr.requestor, - duplicates[0].requestor, - 'OLD' if duplicates[0].result else 'PENDING') - self.session.flush() - desired_result_id = dr.id - - def callback(result): - dr = self.session.query(DesiredResult).get(desired_result_id) - dr.result = result - dr.state = 'COMPLETE' - dr.start_date = datetime.now() - - self.register_result_callback(duplicates[0], callback) - else: - log.debug("desired result id=%d, cfg=%d", dr.id, dr.configuration_id) - dr.state = 'REQUESTED' - self.test_count += 1 - tests_this_generation += 1 - self.plugin_proxy.after_techniques() - return tests_this_generation - - def process_new_results(self): - self.new_results = [] - for result in (self.results_query() - .filter_by(was_new_best=None) - .order_by(Result.collection_date)): - self.plugin_proxy.on_result(result) - self.new_results.append(result) - if self.best_result is None: - self.best_result = result - result.was_new_best = True - elif self.objective.lt(result, self.best_result): - self.best_result = result - result.was_new_best = True - self.plugin_proxy.on_new_best_result(result) - else: - result.was_new_best = False - self.result_callbacks() - - def run_generation_results(self, offset=0): - self.commit() - self.plugin_proxy.before_results_wait() - self.wait_for_results(self.generation + offset) - self.plugin_proxy.after_results_wait() - self.process_new_results() - - @property - def plugin_proxy(self): - """ - forward any method calls on the returned object to all plugins - """ - plugins = self.plugins - - class PluginProxy(object): - def __getattr__(self, method_name): - def plugin_method_proxy(*args, **kwargs): - rv = [] - for plugin in plugins: - rv.append(getattr(plugin, method_name)(*args, **kwargs)) - return filter(lambda x: x is not None, rv) - - return plugin_method_proxy - - return PluginProxy() - - def get_configuration(self, cfg): - """called by SearchTechniques to create Configuration objects""" - self.manipulator.normalize(cfg) - hashv = self.manipulator.hash_config(cfg) - config = Configuration.get(self.session,self.program, hashv, cfg) - return config - - def main(self): - self.plugin_proxy.set_driver(self) - self.plugin_proxy.before_main() - - no_tests_generations = 0 - - # prime pipeline with tests - for z in xrange(self.args.pipelining): - self.run_generation_techniques() - self.generation += 1 - - while not self.convergence_criteria(): - if self.run_generation_techniques() > 0: - no_tests_generations = 0 - elif no_tests_generations <= self.args.bail_threshold: - no_tests_generations += 1 - else: - break - self.run_generation_results(offset=-self.args.pipelining) - self.generation += 1 - - self.plugin_proxy.after_main() - - def external_main_begin(self): - self.plugin_proxy.set_driver(self) - self.plugin_proxy.before_main() - - def external_main_generation(self): - if self.generation > 0: - self.plugin_proxy.after_results_wait() - self.process_new_results() - self.run_generation_techniques() - self.commit() - self.plugin_proxy.before_results_wait() - - def external_main_end(self): - self.plugin_proxy.after_main() - - - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/evolutionarytechniques.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/evolutionarytechniques.py deleted file mode 100644 index e663ac1345cfbd0823df2231fc3e8040298059f9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/evolutionarytechniques.py +++ /dev/null @@ -1,153 +0,0 @@ -import abc -import copy -import random -from technique import SearchTechnique -from opentuner.search import technique - -class EvolutionaryTechnique(SearchTechnique): - def __init__(self, - mutation_rate = 0.1, - crossover_rate = 0.0, - must_mutate_count = 1, - *pargs, **kwargs): - super(EvolutionaryTechnique, self).__init__(*pargs, **kwargs) - self.mutation_rate = mutation_rate - self.crossover_rate = crossover_rate - self.must_mutate_count = must_mutate_count - - @classmethod - def get_hyper_parameters(cls): - return ['mutation_rate', 'crossover_rate', 'must_mutate_count'] - - - def desired_configuration(self): - """ - return a (cfg, priority) that we should test, - through random mutation and crossover - """ - #TODO: set limit value - - parents = self.selection() - parents = map(copy.deepcopy, parents) - parent_hashes = map(self.manipulator.hash_config, parents) - - if len(parents) > 1: - cfg = self.crossover(parents) - else: - cfg = parents[0] - - for z in xrange(10): #retries - self.mutation(cfg) - if self.manipulator.hash_config(cfg) in parent_hashes: - continue # try again - return cfg - - def mutation(self, cfg): - """ - mutate cfg in place - """ - params = self.manipulator.parameters(cfg) - random.shuffle(params) - for param in params[:self.must_mutate_count]: - self.mutate_param(cfg, param) - for param in params[self.must_mutate_count:]: - if random.random() < self.mutation_rate: - self.mutate_param(cfg, param) - - def mutate_param(self, cfg, param): - """ - mutate single parameter of cfg in place - """ - param.op1_randomize(cfg) - - def crossover(self): - raise Exception('Not implemented') - - def selection(self): - """return a list of parent configurations to use""" - if random.random() < self.crossover_rate: - return [self.select(), - self.select()] - else: - return [self.select()] - - @abc.abstractmethod - def select(self): - """return a single random parent configuration""" - return None - -class GreedySelectionMixin(object): - """ - EvolutionaryTechnique mixin for greedily selecting the best known - configuration - """ - def select(self): - """return a single random parent configuration""" - if (self.driver.best_result is not None and - self.driver.best_result.state == 'OK'): - return self.driver.best_result.configuration.data - else: - return self.manipulator.random() - -class NormalMutationMixin(object): - """ - Mutate primitive parameters according to normal distribution - """ - - def __init__(self, sigma = 0.1, *pargs, **kwargs): - super(NormalMutationMixin, self).__init__(*pargs, **kwargs) - self.sigma = sigma - - def mutate_param(self, cfg, param): - """ - mutate single parameter of cfg in place - """ - if param.is_primitive(): - param.op1_normal_mutation(cfg, self.sigma) - else: - random.choice(param.manipulators(cfg))(cfg) - - -class CrossoverMixin(object): - def __init__(self, crossover, *pargs, **kwargs): - super(CrossoverMixin, self).__init__(*pargs, **kwargs) - self.crossover_op = crossover - self.name = 'ga-'+crossover.replace("op3_cross_","") - - def crossover(self, cfgs): - """ - Crossover the first permtation parameter, if found, of two parents and - return one offspring cfg - """ - cfg1, cfg2, = cfgs - new = self.manipulator.copy(cfg1) - params = self.manipulator.parameters(cfg1) - for param in params: - if param.is_permutation() and param.size>6: - getattr(param, self.crossover_op)(new, cfg1, cfg2, d=param.size/3) - return new - - -class UniformGreedyMutation(GreedySelectionMixin, EvolutionaryTechnique): - pass - -class NormalGreedyMutation(NormalMutationMixin, GreedySelectionMixin, EvolutionaryTechnique): - pass - -class GA(CrossoverMixin, UniformGreedyMutation): - pass - -technique.register(GA(crossover = 'op3_cross_OX3', mutation_rate=0.10, crossover_rate=0.8)) -technique.register(GA(crossover = 'op3_cross_OX1', mutation_rate=0.10,crossover_rate=0.8)) -technique.register(GA(crossover = 'op3_cross_PX', mutation_rate=0.10, crossover_rate=0.8)) -technique.register(GA(crossover = 'op3_cross_CX', mutation_rate=0.10, crossover_rate=0.8)) -technique.register(GA(crossover = 'op3_cross_PMX', mutation_rate=0.10, crossover_rate=0.8)) -technique.register(UniformGreedyMutation(name='ga-base', mutation_rate=0.10)) - -technique.register(UniformGreedyMutation(name='UniformGreedyMutation05', mutation_rate=0.05)) -technique.register(UniformGreedyMutation(name='UniformGreedyMutation10', mutation_rate=0.10)) -technique.register(UniformGreedyMutation(name='UniformGreedyMutation20', mutation_rate=0.20)) -technique.register(NormalGreedyMutation(name='NormalGreedyMutation05', mutation_rate=0.05)) -technique.register(NormalGreedyMutation(name='NormalGreedyMutation10', mutation_rate=0.10)) -technique.register(NormalGreedyMutation(name='NormalGreedyMutation20', mutation_rate=0.20)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/globalGA.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/globalGA.py deleted file mode 100644 index e9b1f711746bbd42d0fb6e7ca3972d467c703e66..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/globalGA.py +++ /dev/null @@ -1,125 +0,0 @@ -import abc -import copy -import random -from technique import SearchTechnique -from opentuner.search import technique - -class GlobalEvolutionaryTechnique(SearchTechnique): - def __init__(self, - mutation_rate = 0.1, - crossover_rate = 0.0, - must_mutate_count = 1, - crossover_strength = 0.1, - *pargs, **kwargs): - super(GlobalEvolutionaryTechnique, self).__init__(*pargs, **kwargs) - self.mutation_rate = mutation_rate - self.crossover_rate = crossover_rate - self.must_mutate_count = must_mutate_count - self.crossover_strength = crossover_strength - - @classmethod - def get_hyper_parameters(cls): - return ['mutation_rate', 'crossover_rate', 'must_mutate_count', 'crossover_strength'] - - def desired_configuration(self): - """ - return a (cfg, priority) that we should test, - through random mutation and crossover - """ - #TODO: set limit value - - parents = self.selection() - parents = map(copy.deepcopy, parents) - parent_hashes = map(self.manipulator.hash_config, parents) - - if len(parents) > 1: - cfg = self.crossover(parents) - else: - cfg = parents[0] - - for z in xrange(10): #retries - self.mutation(cfg) - if self.manipulator.hash_config(cfg) in parent_hashes: - continue # try again - return cfg - - def mutation(self, cfg): - """ - mutate cfg in place - """ - params = self.manipulator.parameters(cfg) - random.shuffle(params) - for param in params[:self.must_mutate_count]: - self.mutate_param(cfg, param) - for param in params[self.must_mutate_count:]: - if random.random() < self.mutation_rate: - self.mutate_param(cfg, param) - - def mutate_param(self, cfg, param): - """ - mutate single parameter of cfg in place - """ - param.op1_randomize(cfg) - - def crossover(self, cfgs): - cfg1, cfg2, = cfgs - new = self.manipulator.copy(cfg1) - params = self.manipulator.parameters(cfg1) - random.shuffle(params) - d = int(self.crossover_strength*len(params)) - for param in params[:d]: - param.set_value(new, param.get_value(cfg2)) - return new - - def selection(self): - """return a list of parent configurations to use""" - if random.random() < self.crossover_rate: - return [self.select(), - self.select()] - else: - return [self.select()] - - @abc.abstractmethod - def select(self): - """return a single random parent configuration""" - return None - -class GreedySelectionMixin(object): - """ - EvolutionaryTechnique mixin for greedily selecting the best known - configuration - """ - def select(self): - """return a single random parent configuration""" - if (self.driver.best_result is not None and - self.driver.best_result.state == 'OK'): - return self.driver.best_result.configuration.data - else: - return self.manipulator.random() - -class NormalMutationMixin(object): - """ - Mutate primitive parameters according to normal distribution - """ - - def __init__(self, sigma = 0.1, *pargs, **kwargs): - super(NormalMutationMixin, self).__init__(*pargs, **kwargs) - self.sigma = sigma - - def mutate_param(self, cfg, param): - """ - mutate single parameter of cfg in place - """ - if param.is_primitive(): - param.op1_normal_mutation(cfg, self.sigma) - else: - random.choice(param.manipulators(cfg))(cfg) - - -class UniformGreedyMutation(GreedySelectionMixin, GlobalEvolutionaryTechnique): - pass - -class NormalGreedyMutation(NormalMutationMixin, GreedySelectionMixin, GlobalEvolutionaryTechnique): - pass - -technique.register(NormalGreedyMutation( crossover_rate=0.5, crossover_strength=0.2, name='GGA')) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/manipulator.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/manipulator.py deleted file mode 100755 index decd476bf37ec2c12d2578b9b8266e5f8c705b12..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/manipulator.py +++ /dev/null @@ -1,1853 +0,0 @@ -# vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab autoindent smarttab -import abc -import collections -import copy -import hashlib -import json -import logging -import math -import os -import pickle -import random -from fn import _ -import argparse -from datetime import datetime -import numpy -import inspect -import sys - -log = logging.getLogger(__name__) -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--list-params', '-lp', - help='list available parameter classes') - - -class ConfigurationManipulatorBase(object): - """ - abstract interface for objects used by search techniques to mutate - configurations - """ - __metaclass__ = abc.ABCMeta - - # List of file formats, which can be extended by subclasses. Used in - # write_to_file() and load_from_file(). Objects in list must define - # load(fd) and dump(cfg, fd). - FILE_FORMATS = {'default': json, 'json': json, - 'pickle': pickle, 'pk': pickle} - - def validate(self, config): - """is the given config valid???""" - return all(map(_.validate(config), self.parameters(config))) - - def normalize(self, config): - """mutate config into canonical form""" - for param in self.parameters(config): - param.normalize(config) - - def set_search_driver(self, search_driver): - """called exactly once during setup""" - pass - - def copy(self, config): - """produce copy of config""" - return copy.deepcopy(config) - - def parameters_dict(self, config): - """convert self.parameters() to a dictionary by name""" - return dict([(p.name, p) for p in self.parameters(config)]) - - def param_names(self, *args): - """return union of parameter names in args""" - return sorted(reduce(set.union, - [set(map(_.name, self.parameters(cfg))) - for cfg in args])) - - def linear_config(self, a, cfg_a, b, cfg_b, c, cfg_c): - """return a configuration that is a linear combination of 3 other configs""" - dst = self.copy(cfg_a) - dst_params = self.proxy(dst) - for k in self.param_names(dst, cfg_a, cfg_b, cfg_c): - dst_params[k].op4_set_linear(cfg_a, cfg_b, cfg_c, a, b, c) - return dst - - def _get_serializer(self, filename, format=None): - """ - Extract the correct file format serializer from self.FILE_FORMATS. - Guess the format by extension if one is not given. - """ - if format is None: - format = os.path.splitext(filename)[1].lower().replace('.', '') - if format not in self.FILE_FORMATS: - serializer = self.FILE_FORMATS['default'] - if len(self.FILE_FORMATS) > 1: - log.warning('Unknown file format "%s", using "%s" instead', format, - serializer.__name__) - else: - serializer = self.FILE_FORMATS[format] - return serializer - - def save_to_file(self, cfg, filename, format=None): - """ - Write cfg to filename. Guess the format by extension if one is not given. - """ - with open(filename, 'a+') as fd: - self._get_serializer(filename, format).dump(cfg, fd) - - def load_from_file(self, filename, format=None): - """ - Read cfg from filename. Guess the format by extension if one is not given. - """ - with open(filename, 'rb') as fd: - return self._get_serializer(filename, format).load(fd) - - def proxy(self, cfg): - return ManipulatorProxy(self, cfg) - - @abc.abstractmethod - def random(self): - """produce a random initial configuration""" - return - - @abc.abstractmethod - def parameters(self, config): - """return a list of of Parameter objects""" - return list() - - @abc.abstractmethod - def hash_config(self, config): - """produce unique hash value for the given config""" - return - - -class ConfigurationManipulator(ConfigurationManipulatorBase): - """ - a configuration manipulator using a fixed set of parameters and storing - configs in a dict-like object - """ - - def __init__(self, params=None, config_type=dict, seed_config=None, **kwargs): - if params is None: - params = [] - self.params = list(params) - self.config_type = config_type - self.search_driver = None - self._seed_config = seed_config - super(ConfigurationManipulator, self).__init__(**kwargs) - for p in self.params: - p.parent = self - - def add_parameter(self, p): - p.set_parent(self) - self.params.append(p) - - #TODO sub parameters should be recursed on - # not currently an issue since no doubly-nested sub-parameters - sub_params = p.sub_parameters() - for sp in sub_params: - sp.set_parent(p) - self.params.extend(sub_params) - - def set_search_driver(self, search_driver): - self.search_driver = search_driver - - def seed_config(self): - """produce a fixed seed configuration""" - if self._seed_config: - cfg = copy.deepcopy(self._seed_config) - else: - cfg = self.config_type() - for p in self.params: - if not isinstance(p.name, str) or '/' not in p.name: - cfg[p.name] = p.seed_value() - return cfg - - def random(self): - """produce a random configuration""" - cfg = self.seed_config() - for p in self.parameters(cfg): - p.op1_randomize(cfg) - return cfg - - def parameters(self, config): - """return a list of Parameter objects""" - if type(config) is not self.config_type: - log.error("wrong type, expected %s got %s", - str(self.config_type), - str(type(config))) - raise TypeError() - return self.params - - def parameters_to_json(self): - """ - output information about the parameters in this manipulator in json format: - [ConfigurationManipulator,{pinfo:count,pinfo:count ...}] - where pinfo has a similar form to describe the parameter's sub-parameters: - [param_name,{pinfo:count,pinfo:count ...}] - """ - def param_info_to_json(param, sub_parameters): - """ - recursively output information about a parameter and its subparameters in a json format: - - [parameter_name, {subparam_info:count,subparam_info:count,...}] - or if no subparams - [parameter_name,{}] - - where subparam_info are sorted alphabetically. Note we can't directly use json since - sets/dictionaries aren't always ordered by key - """ - sub_parameter_counts = {} - # build the string - if isinstance(param, str): - param_name = param - else: - param_name = param.__class__.__name__ - out = ['[', param_name, ',{'] - - if len(sub_parameters) > 0: - # count sub params - for sp in sub_parameters: - spout = param_info_to_json(sp, sp.sub_parameters()) - sub_parameter_counts[spout] = sub_parameter_counts.get(spout, 0) + 1 - # add the count map in sorted order - for sp in sorted(sub_parameter_counts): - out.append(sp) - out.append(':') - out.append(str(sub_parameter_counts[sp])) - out.append(',') - out.pop() # remove trailing comma - - out.append('}]') - return ''.join(out) - - # filter out subparameters to avoid double counting - params = [p for p in self.params if p.parent is self] - return param_info_to_json(self, params) - - def hash_config(self, config): - """produce unique hash value for the given config""" - m = hashlib.sha256() - params = list(self.parameters(config)) - params.sort(key=_.name) - for i, p in enumerate(params): - m.update(str(p.name)) - m.update(p.hash_value(config)) - m.update(str(i)) - m.update("|") - return m.hexdigest() - - def search_space_size(self): - """estimate the size of the search space, not precise""" - return reduce(_ * _, [x.search_space_size() for x in self.params]) - - def difference(self, cfg1, cfg2): - cfg = self.copy(cfg1) - for param in self.parameters(cfg1): - if param.is_primitive(cfg1): - # TODO: check range - param.set_value(cfg, param.get_value(cfg1) - param.get_value(cfg2)) - else: - pass - return cfg - - def applySVs(self, cfg, sv_map, args, kwargs): - """ - Apply operators to each parameter according to given map. Updates cfg. - Parameters with no operators specified are not updated. - cfg: configuration data - sv_map: python dict that maps string parameter name to class method name - arg_map: python dict that maps string parameter name to class method - arguments - """ - # TODO: check consistency between sv_map and cfg - param_dict = self.parameters_dict(cfg) - for pname in self.param_names(cfg): - param = param_dict[pname] - getattr(param, sv_map[pname])(cfg, *args[pname], **kwargs[pname]) - - -class Parameter(object): - """ - abstract base class for parameters in a ConfigurationManipulator - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, name): - self.name = name - self.parent = None - super(Parameter, self).__init__() - - def _to_storage_type(self, val): - """hook to support transformation applied while stored""" - return val - - def _from_storage_type(self, sval): - """hook to support transformation applied while stored""" - return sval - - def _read_node(self, config): - """hook to support different storage structures""" - node = config - if not isinstance(self.name, str): - return node, self.name - name_parts = self.name.split('/') - for part in name_parts[:-1]: - if isinstance(node, list): - part = int(part) - node = node[part] - part = name_parts[-1] - if isinstance(node, list): - part = int(part) - return node, part - - def _get(self, config): - """hook to support different storage structures""" - node, part = self._read_node(config) - return self._from_storage_type(node[part]) - - def _set(self, config, v): - """hook to support different storage structures""" - node, part = self._read_node(config) - node[part] = self._to_storage_type(v) - - def set_parent(self, manipulator): - self.parent = manipulator - - def validate(self, config): - """is the given config valid???""" - return True - - def is_primitive(self, ignored=None): - return isinstance(self, PrimitiveParameter) - - def is_permutation(self, ignored=None): - return isinstance(self, PermutationParameter) - - def manipulators(self, config): - """ - a list of manipulator functions to change this value in the config - manipulators must be functions that take a config and change it in place - - default implementation just has op1_randomize as only operation - """ - return [self.op1_randomize] - - def normalize(self, config): - """ - mutate this parameter into a canonical form - """ - pass - - def sub_parameters(self): - """ - additional parameters added with this parameter - """ - return [] - - @abc.abstractmethod - def op1_randomize(self, cfg): - """ - Set this parameter's value in a configuration to a random value - - :param config: the configuration to be changed - """ - pass - - @abc.abstractmethod - def seed_value(self): - """some legal value of this parameter (for creating initial configs)""" - return - - @abc.abstractmethod - def copy_value(self, src, dst): - """copy the value of this parameter from src to dst config""" - pass - - @abc.abstractmethod - def same_value(self, cfg1, cfg2): - """test if cfg1 and cfg2 have the same value of this parameter""" - return - - @abc.abstractmethod - def hash_value(self, config): - """produce unique hash for this value in the config""" - return - - @abc.abstractmethod - def op4_set_linear(self, cfg, cfg_a, cfg_b, cfg_c, a, b, c): - """ - Sets the parameter value in a configuration to a linear combination of 3 - other configurations: :math:`a*cfg_a + b*cfg_b + c*cfg_c` - - :param cfg: the configuration to be changed - :param cfg_a: a parent configuration - :param cfg_b: a parent configuration - :param cfg_c: a parent configuration - :param a: weight for cfg_a - :param b: weight for cfg_b - :param c: weight for cfg_c - """ - pass - - def search_space_size(self): - return 1 - - def op1_nop(self, cfg): - """ - The 'null' operator. Does nothing. - - :param cfg: the configuration to be changed - """ - pass - - # Stochastic variators - def op3_swarm(self, cfg, cfg1, cfg2, c, c1, c2, *args, **kwargs): - """ - Stochastically 'move' the parameter value in a configuration towards those - in two parent configurations. This is done by calling :py:meth:`opn_stochastic_mix` - - :param cfg: the configuration to be changed - :param cfg1: a parent configuration - :param cfg2: a parent configuration - :param c: weight of original configuration - :param c1: weight for cfg1 - :param c2: weight for cfg2 - """ - # default to probabilistic treatment - self.opn_stochastic_mix(cfg, [cfg, cfg1, cfg2], [c, c1, c2]) - - def opn_stochastic_mix(self, cfg, cfgs, ratio, *args, **kwargs): - """ - Stochastically recombine a list of parent values into a single result. - - This randomly copies a value from a list of parents configurations according - to a list of weights. - - :param cfg: the configuration to be changed - :param cfgs: a list of parent configurations - :param ratio: a list of floats representing the weight of each configuration - in cfgs - - """ - assert len(cfgs) == len(ratio) - r = random.random() - c = numpy.array(ratio, dtype=float) / sum(ratio) - for i in range(len(c)): - if r < sum(c[:i + 1]): - self.copy_value(cfg, cfgs[i]) - break - - -class PrimitiveParameter(Parameter): - """ - An abstract interface implemented by parameters that represent a single - dimension in a cartesian space in a legal range - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, name, value_type=float, **kwargs): - self.value_type = value_type - super(PrimitiveParameter, self).__init__(name, **kwargs) - - def hash_value(self, config): - """produce unique hash for this value in the config""" - self.normalize(config) - return hashlib.sha256(repr(self.get_value(config))).hexdigest() - - def copy_value(self, src, dst): - """copy the value of this parameter from src to dst config""" - self.set_value(dst, self.get_value(src)) - - def same_value(self, cfg1, cfg2): - """test if cfg1 and cfg2 have the same value of this parameter""" - return self.get_value(cfg1) == self.get_value(cfg2) - - def is_integer_type(self): - """true if self.value_type can only represent integers""" - return self.value_type(0) == self.value_type(0.1) - - def get_unit_value(self, config): - """get_value scaled such that range is between 0.0 and 1.0""" - low, high = self.legal_range(config) - if self.is_integer_type(): - # account for rounding - low -= 0.4999 - high += 0.4999 - val = self.get_value(config) - if low < high: - return float(val - low) / float(high - low) - else: - if low > high: - log.warning('invalid range for parameter %s, %s to %s', - self.name, low, high) - # only a single legal value! - return 0.0 - - def set_unit_value(self, config, unit_value): - """set_value scaled such that range is between 0.0 and 1.0""" - assert 0.0 <= unit_value <= 1.0 - low, high = self.legal_range(config) - if self.is_integer_type(): - # account for rounding - low -= 0.4999 - high += 0.4999 - if low < high: - val = unit_value * float(high - low) + low - if self.is_integer_type(): - val = round(val) - val = max(low, min(val, high)) - self.set_value(config, self.value_type(val)) - - def op1_normal_mutation(self, cfg, sigma=0.1, *args, **kwargs): - """ - apply normally distributed noise to this parameter's value in a - configuration - - :param cfg: The configuration to be changed - :param sigma: the std. deviation of the normally distributed noise on a unit - scale - """ - v = self.get_unit_value(cfg) - v += random.normalvariate(0.0, sigma) - # handle boundary cases by reflecting off the edge - if v < 0.0: - v *= -1.0 - if v > 1.0: - v = 1.0 - (v % 1) - self.set_unit_value(cfg, v) - - def op4_set_linear(self, cfg, cfg_a, cfg_b, cfg_c, a, b, c): - """ - set the parameter value in a configuration to a linear combination of 3 - other configurations: :math:`a*cfg_a + b*cfg_b + c*cfg_c` - - :param cfg: The configuration to be changed - :param cfg_a: a parent configuration - :param cfg_b: a parent configuration - :param cfg_c: a parent configuration - :param a: weight for cfg_a - :param b: weight for cfg_b - :param c: weight for cfg_c - """ - va = self.get_unit_value(cfg_a) - vb = self.get_unit_value(cfg_b) - vc = self.get_unit_value(cfg_c) - v = a * va + b * vb + c * vc - v = max(0.0, min(v, 1.0)) - - self.set_unit_value(cfg, v) - - def manipulators(self, config): - """ - a list of manipulator functions to change this value in the config - manipulators must be functions that take a config and change it in place - - for primitive params default implementation is uniform random and normal - """ - return [self.op1_randomize, self.op1_normal_mutation] - - @abc.abstractmethod - def set_value(self, config, value): - """assign this value in the given configuration""" - pass - - @abc.abstractmethod - def get_value(self, config): - """retrieve this value from the given configuration""" - return 0 - - @abc.abstractmethod - def legal_range(self, config): - """return the legal range for this parameter, inclusive""" - return 0, 1 - - -class NumericParameter(PrimitiveParameter): - """ - A parameter representing a number with a minimum and maximum value - """ - def __init__(self, name, min_value, max_value, **kwargs): - """min/max are inclusive""" - assert min_value <= max_value - super(NumericParameter, self).__init__(name, **kwargs) - # after super call so self.value_type is initialized - self.min_value = self.value_type(min_value) - self.max_value = self.value_type(max_value) - - def seed_value(self): - """some legal value of this parameter (for creating initial configs)""" - return self.min_value - - def set_value(self, config, value): - assert value >= self.min_value - assert value <= self.max_value - self._set(config, value) - - def get_value(self, config): - return self._get(config) - - def legal_range(self, config): - return self.min_value, self.max_value - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration to a random value in its legal - range - - :param config: the configuration to be changed - """ - if self.is_integer_type(): - self.set_value(config, random.randint(*self.legal_range(config))) - else: - self.set_value(config, random.uniform(*self.legal_range(config))) - - def op1_scale(self, cfg, k): - """ - Scale this parameter's value in a configuration by a constant factor - - :param cfg: the configuration to be changed - :param k: the constant factor to scale the parameter value by - """ - v = self.get_value(cfg) * k - v = max(self.min_value, min(self.max_value, v)) - self.set_value(cfg, v) - - def op3_difference(self, cfg, cfg1, cfg2): - """ - Set this parameter's value in a configuration to the difference between this - parameter's values in 2 other configs (cfg2 - cfg1) - - :param cfg: the configuration to be changed - :param cfg1: The configuration whose parameter value is being subtracted - :param cfg2: The configuration whose parameter value is subtracted from - """ - v = self.get_value(cfg2) - self.get_value(cfg1) - v = max(self.min_value, min(self.max_value, v)) - self.set_value(cfg, v) - - def opn_sum(self, cfg, *cfgs): - """ - Set this parameter's value in a configuration to the sum of it's values in a - list of configurations - - :param cfg: the configuration to be changed - :param cfgs: a list of configurations to sum - """ - v = sum([self.get_value(c) for c in cfgs]) - v = max(self.min_value, min(self.max_value, v)) - self.set_value(cfg, v) - - def search_space_size(self): - if self.value_type is float: - return 2 ** 32 - else: - return self.max_value - self.min_value + 1 # inclusive range - - -class IntegerParameter(NumericParameter): - """ - A parameter representing an integer value in a legal range - """ - def __init__(self, name, min_value, max_value, **kwargs): - """min/max are inclusive""" - kwargs['value_type'] = int - super(IntegerParameter, self).__init__(name, min_value, max_value, **kwargs) - - def op3_swarm(self, cfg, cfg1, cfg2, c=1, c1=0.5, - c2=0.5, velocity=0, sigma=0.2, *args, **kwargs): - """ - Simulates a single update step in particle swarm optimization by updating - the current position and returning a new velocity. - - The new velocity is given by - - .. math:: c*velocity + r1*c1*(cfg1-cfg) + r2*c2*(cfg2-cfg) - - where r1 and r2 are random values between 0 and 1. - - The new current position is the new velocity with gaussian noise added. - - :param cfg: the configuration to be changed. Represents the current position - :param cfg1: a configuration to shift towards. Should be the local best - position - :param cfg2: a configuration to shift towards. Should be the global best - position - :param c: the weight of the current velocity - :param c1: weight of cfg1 - :param c2: weight of cfg2 - :param velocity: the old velocity - :param sigma: standard deviation of the gaussian noise, on a unit-scale - :return: the new velocity, a float - - """ - vmin, vmax = self.legal_range(cfg) - k = vmax - vmin - # calculate the new velocity - v = velocity * c + (self.get_value(cfg1) - self.get_value( - cfg)) * c1 * random.random() + (self.get_value( - cfg2) - self.get_value(cfg)) * c2 * random.random() - # Map velocity to continuous space with sigmoid - s = k / (1 + numpy.exp(-v)) + vmin - # Add Gaussian noise - p = random.gauss(s, sigma * k) - # Discretize and bound - p = int(min(vmax, max(round(p), vmin))) - self.set_value(cfg, p) - return v - - -class FloatParameter(NumericParameter): - def __init__(self, name, min_value, max_value, **kwargs): - """min/max are inclusive""" - kwargs['value_type'] = float - super(FloatParameter, self).__init__(name, min_value, max_value, **kwargs) - - def op3_swarm(self, cfg, cfg1, cfg2, c=1, c1=0.5, - c2=0.5, velocity=0, *args, **kwargs): - """ - - Simulates a single update step in particle swarm optimization by updating - the current position and returning a new velocity. - - The new velocity is given by - - .. math:: c*velocity + r1*c1*(cfg1-cfg) + r2*c2*(cfg2-cfg) - - where r1 and r2 are random values between 0 and 1 - - The new current position is the old current position offset by the new - velocity: - - :param cfg: the configuration to be changed. Represents the current position - :param cfg1: a configuration to shift towards. Should be the local best - position - :param cfg2: a configuration to shift towards. Should be the global best - position - :param c: the weight of the current velocity - :param c1: weight of cfg1 - :param c2: weight of cfg2 - :param velocity: the old velocity - :return: the new velocity, a float - - """ - vmin, vmax = self.legal_range(cfg) - v = velocity * c + (self.get_value(cfg1) - self.get_value( - cfg)) * c1 * random.random() + (self.get_value( - cfg2) - self.get_value(cfg)) * c2 * random.random() - p = self.get_value(cfg) + v - p = min(vmax, max(p, vmin)) - self.set_value(cfg, p) - return v - - -class ScaledNumericParameter(NumericParameter): - """ - A Parameter that is stored in configurations normally, but has a scaled - value when accessed using 'get_value'. - Because search techniques interact with Parameters through get_value, these - parameters are searched on a different scale (e.g. log scale). - """ - - @abc.abstractmethod - def _scale(self, v): - """ - called on a value when getting it from it's configuration. Transforms the - actual value to the scale it is searched on - """ - return v - - @abc.abstractmethod - def _unscale(self, v): - """ - called on a value when storing it. Transforms a value from it's search scale - to it's actual value - """ - return v - - def set_value(self, config, value): - NumericParameter.set_value(self, config, self._unscale(value)) - - def get_value(self, config): - return self._scale(NumericParameter.get_value(self, config)) - - def legal_range(self, config): - return map(self._scale, NumericParameter.legal_range(self, config)) - - -class LogIntegerParameter(ScaledNumericParameter, FloatParameter): - """ - an integer value that is searched on a log scale, but stored without scaling - """ - - def _scale(self, v): - return math.log(v + 1.0 - self.min_value, 2.0) - - def _unscale(self, v): - v = 2.0 ** v - 1.0 + self.min_value - v = int(round(v)) - return v - - def legal_range(self, config): - low, high = NumericParameter.legal_range(self, config) - # increase the bounds account for rounding - return self._scale(low - 0.4999), self._scale(high + 0.4999) - - -class LogFloatParameter(ScaledNumericParameter, FloatParameter): - """ - a float parameter that is searched on a log scale, but stored without scaling - """ - - def _scale(self, v): - return math.log(v + 1.0 - self.min_value, 2.0) - - def _unscale(self, v): - v = 2.0 ** v - 1.0 + self.min_value - return v - - -class PowerOfTwoParameter(ScaledNumericParameter, IntegerParameter): - """ - An integer power of two, with a min and max value. Searched by the exponent - """ - - def __init__(self, name, min_value, max_value, **kwargs): - kwargs['value_type'] = int - assert min_value >= 1 - assert math.log(min_value, 2) % 1 == 0 # must be power of 2 - assert math.log(max_value, 2) % 1 == 0 # must be power of 2 - super(PowerOfTwoParameter, self).__init__(name, min_value, max_value, - **kwargs) - - def _scale(self, v): - return int(math.log(v, 2)) - - def _unscale(self, v): - return 2 ** int(v) - - def legal_range(self, config): - return int(math.log(self.min_value, 2)), int(math.log(self.max_value, 2)) - - def search_space_size(self): - return int(math.log(super(PowerOfTwoParameter, self).search_space_size(), 2)) - - -################## - -class ComplexParameter(Parameter): - """ - A non-cartesian parameter that can't be manipulated directly, but has a set - of user defined manipulation functions - """ - - def copy_value(self, src, dst): - """copy the value of this parameter from src to dst config""" - self._set(dst, copy.deepcopy(self._get(src))) - - def same_value(self, cfg1, cfg2): - """test if cfg1 and cfg2 have the same value of this parameter""" - return self._get(cfg1) == self._get(cfg2) - - def hash_value(self, config): - """produce unique hash for this value in the config""" - self.normalize(config) - return hashlib.sha256(repr(self._get(config))).hexdigest() - - def get_value(self, config): - return self._get(config) - - def set_value(self, config, value): - self._set(config, value) - - def op4_set_linear(self, cfg, cfg_a, cfg_b, cfg_c, a, b, c): - """ - set this value to :math:`a*cfg_a + b*cfg_b + c*cfg_c` - - this operation is not possible in general with complex parameters but - we make an attempt to "fake" it for common use cases - - basically a call to randomize unless after normalization, - a = 1.0, b == -c, and cfg_b == cfg_c, in which case nothing is done - - :param cfg: the configuration to be changed - :param cfg_a: a parent configuration - :param cfg_b: a parent configuration - :param cfg_c: a parent configuration - :param a: weight for cfg_a - :param b: weight for cfg_b - :param c: weight for cfg_c - """ - # attempt to normalize order, we prefer a==1.0 - if a != 1.0 and b == 1.0: # swap a and b - a, cfg_a, b, cfg_b = b, cfg_b, a, cfg_a - if a != 1.0 and c == 1.0: # swap a and c - a, cfg_a, c, cfg_c = c, cfg_c, a, cfg_a - - # attempt to normalize order, we prefer b==-c - if b < c: # swap b and c - b, cfg_b, c, cfg_c = c, cfg_c, b, cfg_b - if b != -c and a == -c: # swap a and c - a, cfg_a, c, cfg_c = c, cfg_c, a, cfg_a - - if a == 1.0 and b == -c: - self.copy_value(cfg_a, cfg) - self.add_difference(cfg, b, cfg_b, cfg_c) # TODO inline this logic? - else: - # TODO: should handle more cases - self.op1_randomize(cfg) - - def add_difference(self, cfg_dst, scale, cfg_b, cfg_c): - """ - add the difference cfg_b-cfg_c to cfg_dst - - this is the key operation used in differential evolution - and some simplex techniques - - this operation is not possible in general with complex parameters but - we make an attempt to "fake" it - """ - if not self.same_value(cfg_b, cfg_c): - self.op1_randomize(cfg_dst) - - @abc.abstractmethod - def op1_randomize(self, config): - """ - randomize this value without taking into account the current position - :param config: the configuration to be changed - """ - pass - - @abc.abstractmethod - def seed_value(self): - """some legal value of this parameter (for creating initial configs)""" - return - - -class BooleanParameter(ComplexParameter): - def manipulators(self, config): - return [self.op1_flip] - - def get_value(self, config): - return self._get(config) - - def set_value(self, config, value): - self._set(config, value) - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration randomly - - :param config: the configuration to be changed - """ - self._set(config, self.seed_value()) - - def seed_value(self): - return random.choice((True, False)) - - def op1_flip(self, config): - """ - Flip this parameter's value in a configuration - - :param config: the configuration to be changed - """ - self._set(config, not self._get(config)) - - def search_space_size(self): - return 2 - - def op3_swarm(self, cfg, cfg1, cfg2, c=1, c1=0.5, - c2=0.5, velocity=0, *args, **kwargs): - """ - Simulates a single update step in particle swarm optimization by updating - the current position and returning a new velocity. - - The new velocity is given by - - .. math:: c*velocity + r1*c1*(cfg1-cfg) + r2*c2*(cfg2-cfg) - - where r1 and r2 are random values between 0 and 1 - - The new current position is randomly chosen based on the new velocity - - :param cfg: the configuration to be changed. Represents the current position - :param cfg1: a configuration to shift towards. Should be the local best position - :param cfg2: a configuration to shift towards. Should be the global best position - :param c: the weight of the current velocity - :param c1: weight of cfg1 - :param c2: weight of cfg2 - :param velocity: the old velocity - :param args: - :param kwargs: - :return: the new velocity, a float - - """ - v = velocity * c + (self.get_value(cfg1) - self.get_value( - cfg)) * c1 * random.random() + (self.get_value( - cfg2) - self.get_value(cfg)) * c2 * random.random() - # Map velocity to continuous space with sigmoid - s = 1 / (1 + numpy.exp(-v)) - # Decide position randomly - p = (s - random.random()) > 0 - self.set_value(cfg, p) - return v - - -class SwitchParameter(ComplexParameter): - """ - A parameter representing an unordered collection of options with no implied - correlation between the choices. The choices are range(option_count) - """ - - def __init__(self, name, option_count): - self.option_count = option_count - super(SwitchParameter, self).__init__(name) - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration to a random value - - :param config: the configuration to be changed - """ - self._set(config, random.randrange(self.option_count)) - - def seed_value(self): - return random.randrange(self.option_count) - - def search_space_size(self): - return max(1, self.option_count) - - -class EnumParameter(ComplexParameter): - """ - same as a SwitchParameter but choices are taken from an arbitrarily typed list - """ - - def __init__(self, name, options): - super(EnumParameter, self).__init__(name) - self.options = list(options) - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration to a random value - - :param config: the configuration to be changed - """ - self._set(config, random.choice(self.options)) - - def seed_value(self): - return random.choice(self.options) - - def search_space_size(self): - return max(1, len(self.options)) - - -class PermutationParameter(ComplexParameter): - """ - A parameter representing a permutation (or ordering) as a list of items - """ - def __init__(self, name, items): - super(PermutationParameter, self).__init__(name) - self._items = list(items) - self.size = len(items) - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration to a random value - - :param config: the configuration to be changed - """ - random.shuffle(self._get(config)) - self.normalize(config) - - def op1_small_random_change(self, config, p=0.25): - """ - Iterates through the list and probabilistically swaps each element with the - next element - - :param p: probability of swapping an element with the next element - :param config: the configuration to be changed - """ - cfg_item = self._get(config) - for i in xrange(1, len(cfg_item)): - if random.random() < p: - # swap - cfg_item[i - 1], cfg_item[i] = cfg_item[i], cfg_item[i - 1] - self.normalize(config) - - def seed_value(self): - return list(self._items) # copy - - def manipulators(self, config): - return [self.op1_randomize, self.op1_small_random_change] - - def get_value(self, config): - return self._get(config) - - def set_value(self, config, value): - self._set(config, value) - - def search_space_size(self): - return math.factorial(max(1, len(self._items))) - - def op3_cross(self, cfg, cfg1, cfg2, xchoice='op3_cross_OX1', strength=0.3, - *args, **kwargs): - """ - Calls the crossover operator specified by xchoice - Passes argument d = strength*(size of the permutation) - - :param cfg: the configuration to be changed - :param cfg1: a parent configuration - :param cfg2: a parent configuration - :param xchoice: string specifying which crossover operator to use (should start with op3_cross prefix) - :param strength: the strength of the crossover - """ - dd = int(round(self.size * strength)) - if dd < 1: - log.warning('Crossover length too small. Cannot create new solution.') - if dd >= self.size: - log.warning('Crossover length too big. Cannot create new solution.') - getattr(self, xchoice)(cfg, cfg1, cfg2, d=dd, *args, **kwargs) - - def op3_swarm(self, cfg, cfg1, cfg2, xchoice='op3_cross_OX1', c=0.5, - c1=0.5, c2=0.5, strength=0.3, velocity=0, *args, **kwargs): - """ - Replacement for particle swarm optimization iterative step for permutations. - Given a target cfg and 2 parent cfgs, probabilistically performs an - op3_cross with one of the 2 parents. - - :param cfg: the configuration to be changed. Represents the current position - :param cfg1: a configuration to shift towards. Should be the local best - position - :param cfg2: a configuration to shift towards. Should be the global best - position - :param xchoice: which crossover operator should be used - :param c: the probability of not performing a crossover - :param c1: the probability of performing a crossover with cfg1 (if a - crossover is performed) - :param c2: unused - :param strength: the strength of the crossover - :param velocity: the old velocity - unused - """ - if random.uniform(0, 1) > c: - if random.uniform(0, 1) < c1: - # Select crossover operator - self.op3_cross(cfg, cfg, cfg1, xchoice, strength) - else: - self.op3_cross(cfg, cfg, cfg2, xchoice, strength) - - # swap-based operators - def op2_random_swap(self, cfg, cfg1, *args, **kwargs): - """ - Swap a random pair of items in cfg1 and save the result into cfg - - :param cfg: the configuration to be changed - :param cfg1: the configuration whose PermutationParameter's elements are - swapped and copied into cfg - """ - p = self.get_value(cfg1)[:] - r = random.randint(0, len(p) - 1) - s = random.randint(0, len(p) - 1) - v1 = p[r] - v2 = p[s] - p[r] = v2 - p[s] = v1 - self.set_value(cfg, p) - - def op2_random_invert(self, cfg, cfg1, strength=0.3, *args, **kwargs): - """ - Reverse the ordering of a random subsection of size d in cfg1 and save the - result in cfg where d = strength*total-size - - :param cfg: the configuration to be changed - :param cfg1: the configuration whose PermutationParameter is inverted - :param strength: the size of the reversed subsection as a fraction of the - total size - """ - p = self.get_value(cfg1)[:] - d = int(round(len(p) * strength)) - r = random.randint(0, len(p) - d) - subpath = p[r:r + d][:] - subpath.reverse() - p[r:r + d] = subpath - self.set_value(cfg, p) - - # Crossover operators - def op3_cross_PX(self, cfg, cfg1, cfg2, d=0): - """ - Partition crossover (Whitley 2009?) - - Chooses a random cut point and reorders elements in cfg1 up to the cut point - according to their order in cfg2. - - Saves the result in cfg - - :param cfg: the configuration to be changed - :param cfg1: the first parent configuration. The "base" configuration - :param cfg2: the second parent configuration. Is "crossed into" cfg1 - :param d: unused - """ - p1 = self.get_value(cfg1) - p2 = self.get_value(cfg2) - c1 = random.randint(2, len(p1)) - self.set_value(cfg, sorted(p1[:c1], key=lambda x: p2.index(x)) + p1[c1:]) - - def op3_cross_PMX(self, cfg, cfg1, cfg2, d=0): - """ - Partially-mapped crossover Goldberg & Lingle (1985) - - Replaces a random section of cfg1 with the corresponding section in cfg2. - Displaced elements in cfg1 are moved to the old position of the elements - displacing them - - :param cfg: the configuration to be changed - :param cfg1: the first parent configuration. The "base" configuration - :param cfg2: the second parent configuration. Is "crossed into" cfg1 - :param d: the size of the crossover - """ - if d == 0: - d = max(1, int(round(self.size * 0.3))) # default to 1/3 of permutation size - p1 = self.get_value(cfg1)[:] - p2 = self.get_value(cfg2)[:] - - r = random.randint(0, len(p1) - d) - - c1 = p1[r:r + d] - c2 = p2[r:r + d] - - # get new permutation by crossing over a section of p2 onto p1 - pnew = self.get_value(cfg1)[:] - pnew[r:r + d] = c2 - # fix conflicts by taking displaced elements in crossed over section - # displaced = (elements x in c1 where x does not have corresponding value in c2) - # and putting them where the value that displaced them was - - #candidates for displacement - candidate_indices = set(range(r) + range(r+d, len(p1))) - # Check through displaced elements to find values to swap conflicts to - while c1 != []: - n = c1[0] - #try to match up a value in c1 to the equivalent value in c2 - while c2[0] in c1: - if n == c2[0]: - # already match up - break - # find position idx of c2[0] in c1 - link_idx = c1.index(c2[0]) - # get value of c2 at idx - link = c2[link_idx] - # remove c2[idx] and c1[idx] since they match up when we swap c2[0] with c2[idx] (this avoids an infinite loop) - del c2[link_idx] - del c1[link_idx] - # swap new value into c2[0] - c2[0] = link - - if n != c2[0]: - # first check if we can swap in the crossed over section still - if n in c2: - c2[c2.index(n)] = c2[0] - else: - # assign first instance of c2[0] outside of the crossed over section in pnew to c1[0] - for idx in candidate_indices: - if pnew[idx] == c2[0]: - pnew[idx] = c1[0] - candidate_indices.remove(idx) # make sure we don't override this value now - break - # remove first elements - del c1[0] - del c2[0] - self.set_value(cfg, pnew) - - def op3_cross_CX(self, cfg, cfg1, cfg2, d=0): - """ - Implementation of a cyclic crossover. - - Repeatedly replaces elements of cfg1 with the element at the same index in - cfg2. This is done until a cycle is reached and cfg1 is valid again. The - initial replacement is random. - - Saves the result in cfg. - - :param cfg: the configuration to be changed - :param cfg1: the first parent configuration. The "base" configuration - :param cfg2: the second parent configuration. Is "crossed into" cfg1 - :param d: unused - """ - p1 = self.get_value(cfg1) - p2 = self.get_value(cfg2) - p = p1[:] - - s = random.randint(0, len(p1) - 1) - i = s - indices = set() - - while len(indices) < len(p1): # should never exceed this - indices.add(i) - val = p1[i] - i = p2.index(val) - # deal with duplicate values - while i in indices: - if i == s: - break - i = p2[i+1:].index(val) + i + 1 - if i == s: - break - - for j in indices: - p[j] = p2[j] - - self.set_value(cfg, p) - - def op3_cross_OX1(self, cfg, cfg1, cfg2, d=0): - """ - Ordered Crossover (Davis 1985) - - Exchanges a subpath from cfg2 into cfg1 while maintaining the order of the - remaining elements in cfg1. - - Saves the result in cfg. - - :param cfg: the configuration to be changed - :param cfg1: the first parent configuration. The "base" configuration - :param cfg2: the second parent configuration. Is "crossed into" cfg1 - :param d: size of the exchanged subpath - """ - if d == 0: - d = max(1, int(round(self.size * 0.3))) # default to 1/3 of permutation size - p1 = self.get_value(cfg1) - p2 = self.get_value(cfg2) - c1 = p1[:] - c2 = p2[:] - # Randomly find cut points - r = random.randint(0, len( - p1) - d) # Todo: treat path as circle i.e. allow cross-boundary cuts - [c1.remove(i) for i in p2[r:int(r + d)]] - self.set_value(cfg, c1[:r] + p2[r:r + d] + c1[r:]) - - def op3_cross_OX3(self, cfg, cfg1, cfg2, d=0): - """ - Ordered crossover variation 3 (Deep 2010) - - Same as op3_cross_OX1, except the parents have different cut points for - their subpaths - - :param cfg: the configuration to be changed - :param cfg1: the first parent configuration. The "base" configuration - :param cfg2: the second parent configuration. Is "crossed into" cfg1 - :param d: size of the exchanged subpath - """ - if d == 0: - d = max(1, int(round(self.size * 0.3))) # default to 1/3 of permutation size - p1 = self.get_value(cfg1) - p2 = self.get_value(cfg2) - c1 = p1[:] - c2 = p2[:] - # Randomly find cut points - # Todo: treat path as circle i.e. allow cross-boundary cuts - r1 = random.randint(0, len(p1) - d) - r2 = random.randint(0, len(p1) - d) - [c1.remove(i) for i in p2[r2:r2 + d]] - self.set_value(cfg, c1[:r1] + p2[r2:r2 + d] + c1[r1:]) - - def search_space_size(self): - return math.factorial(max(1, len(self._items))) - - -class ScheduleParameter(PermutationParameter): - def __init__(self, name, items, deps): - super(ScheduleParameter, self).__init__(name, items) - self.deps = dict((k, set(v)) for k, v in deps.items()) - log.debug("ScheduleParameter(%s, %s, %s)", repr(name), repr(items), - repr(deps)) - self._expand_deps() - - def _expand_deps(self): - """expand self.deps to include recursive dependencies""" - fixed_point = False - while not fixed_point: - fixed_point = True - for k in self.deps.keys(): - oldlen = len(self.deps[k]) - for dep in list(self.deps[k]): - if dep in self.deps: - self.deps[k].update(self.deps[dep]) - if oldlen != len(self.deps[k]): - fixed_point = False - - # verify schedule is valid - items = set(self._items) - for k, v in self.deps.items(): - if k in v: - raise Exception("ScheduleParameter('%s') cycle: %s depends on itself" % - (self.name, k)) - - if v - items: - raise Exception("ScheduleParameter('%s'): %s is unknown" % - (self.name, v - items)) - - if set(self.deps.keys()) - items: - raise Exception("ScheduleParameter('%s'): %s is unknown" % - (self.name, set(self.deps.keys()) - items)) - - def is_topologically_sorted(self, values): - used = set() - for v in values: - if v in self.deps and self.deps[v].union(used): - return False - used.add(v) - return True - - def topologically_sorted_depth_first(self, values): - """faster but not stable enough""" - if self.is_topologically_sorted(values): - return values - sorted_values = [] - used = set() - deps = dict((k, sorted(v, key=values.index, reverse=True)) - for k, v in self.deps.items()) - - def visit(v): - if v in used: - return - if v in deps: - for dv in deps[v]: - visit(dv) - used.add(v) - sorted_values.append(v) - - for v in reversed(values): - visit(v) - return list(reversed(sorted_values)) - - def topologically_sorted(self, values): - if self.is_topologically_sorted(values): - return values - deps = copy.deepcopy(self.deps) - queue = collections.deque(reversed(values)) - sorted_values = [] - while queue: - v = queue.popleft() - if v in deps and deps[v]: - queue.append(v) - else: - for k, d in deps.items(): - d.discard(v) - if not d: - del deps[k] - sorted_values.append(v) - - return list(reversed(sorted_values)) - - def normalize(self, cfg): - self._set(cfg, self.topologically_sorted(self._get(cfg))) - - -class SelectorParameter(ComplexParameter): - def __init__(self, name, choices, max_cutoff, - order_class=PermutationParameter, - offset_class=LogIntegerParameter): - super(SelectorParameter, self).__init__(name) - self.choices = choices - self.max_cutoff = max_cutoff - self.order_param = order_class('{0}/order'.format(name), choices) - self.offset_params = [ - offset_class('{0}/offsets/{1}'.format(name, i), 0, max_cutoff) - for i in xrange(len(choices) - 1)] - - def sub_parameters(self): - return [self.order_param] + self.offset_params - - def seed_value(self): - return {'order': self.order_param.seed_value(), - 'offsets': [co.seed_value() for co in self.offset_params]} - - def op1_randomize(self, config): - random.choice(self.sub_parameters()).op1_randomize(config) - - def selector_iter(self, config): - """ - yield (cutoff, choice) pairs - cutoff will be None on the first value - """ - order = config[self.name]['order'] - yield (None, order[0]) - cutoff = 0 - for n, offset in enumerate(config[self.name]['offsets']): - if offset > 0: - cutoff += offset - yield cutoff, order[n + 1] - - -class ParameterArray(ComplexParameter): - """ - Represents an array of Parameters - """ - def __init__(self, name, count, element_type, *args, **kwargs): - super(ParameterArray, self).__init__(name) - self.count = count - - self.sub_params = [ - element_type('{0}/{1}'.format(name, i), *args[i], **kwargs[i]) - for i in xrange(count)] - - def sub_parameters(self): - return self.sub_params - - def seed_value(self): - return [p.seed_value() for p in self.sub_params] - - def op1_randomize(self, config): - """ - randomly selects a sub-parameter and randomizes it - - :param config: the configuration to be changed - """ - random.choice(self.sub_parameters()).op1_randomize(config) - - -class BooleanParameterArray(ParameterArray): - """ - Represents an array of BooleanParameters - currently unimplimented - """ - def __init__(self, name, count): - super(BooleanParameterArray, self).__init__(name, count, BooleanParameter) - - def op3_swarm(self, cfg, cfg1, cfg2, *args, **kwargs): - # TODO - pass - - def op3_cross(self, cfg, cfg1, cfg2, *args, **kwargs): - # TODO - pass - - -class IntegerParameterArray(ParameterArray): - """ - Represents an array of IntegerParameters - currently unimplemented - """ - def __init__(self, name, min_values, max_values): - assert len(min_values) == len(max_values) - super(IntegerParameterArray, self).__init__(name, len(min_values), - IntegerParameter, - min_value=min_values, - max_value=max_values) - - def op3_swarm(self, cfg, cfg1, cfg2, *args, **kwargs): - # TODO - pass - - def op3_cross(self, cfg, cfg1, cfg2, *args, **kwargs): - # TODO - pass - - -class Array(ComplexParameter): - """ - An interface for parameters representing an array of values. - """ - # TODO: constraints? (upper & lower bound etc) - def __init__(self, name, size): - super(Array, self).__init__(name) - self.size = size - - def op3_cross(self, cfg, cfg1, cfg2, strength=0.3, *args, **kwargs): - """ - Crosses two arrays by replacing a random subsection of cfg1 with the - corresponding subsection of cfg2.The size of the chunk is a fixed fraction - of the total length, given by the strength - - Behaves like a specialized 2-point crossover, where the first cut point is - random and the second cut is a set distance after. - - :param cfg: the configuration to be changed - :param cfg1: the configuration being inserted into - :param cfg2: the configuration being inserted - :param strength: the size of the crossover, as a fraction of total array - length - """ - d = int(round(self.size * strength)) - if d < 1: - log.debug('Crossover length too small. Cannot create new solution.') - if d >= self.size: - log.debug('Crossover length too big. Cannot create new solution.') - p1 = self.get_value(cfg1) - p2 = self.get_value(cfg2) - r = random.randint(0, len( - p1) - d) # Todo: treat path as circle i.e. allow cross-boundary cuts - p = numpy.concatenate([p1[:r], p2[r:r + d], p1[r + d:]]) - self.set_value(cfg, p) - - def op3_swarm(self, cfg, cfg1, cfg2, c=1, c1=0.5, - c2=0.5, velocity=0, strength=0.3, *args, **kwargs): - """ - Replacement for a particle swarm optimization iterative step for arrays. - Given a target cfg and 2 parent cfgs, probabilistically performs an - :py:meth:`op3_cross` with one of the 2 parents. - - :param cfg: the configuration to be changed. Represents the cfg position - :param cfg1: a configuration to shift towards. Should be the local best - position - :param cfg2: a configuration to shift towards. Should be the global best - position - :param c: the probability of not performing a crossover - :param c1: the probability of performing a crossover with cfg1 (if a - crossover is performed) - :param c2: unused - :param velocity: the old velocity - unused - :param strength: the strength of the crossover - """ - if random.uniform(0, 1) > c: - if random.uniform(0, 1) < c1: - # Select crossover operator - self.op3_cross(cfg, cfg, cfg1, strength) - else: - self.op3_cross(cfg, cfg, cfg2, strength) - - def get_value(self, config): - return self._get(config) - - def set_value(self, config, value): - self._set(config, value) - - -class BooleanArray(Array): - """ - Represents an array of boolean values which are either 0 or 1 - """ - def op3_swarm_parallel(self, cfg, cfg1, cfg2, c=1, - c1=0.5, c2=0.5, velocities=0): - """ - Simulates a single particle swarm optimization step for each element in the - array by updating each position and returning an array of new velocities. - - The new velocities are given by - - .. math:: c*velocity + r1*c1*(cfg1-cfg) + r2*c2*(cfg2-cfg) - - where r1 and r2 are random values between 0 and 1. In each iteration, r1 and - r2 are constant across array elements - - The new cfg positions are randomly chosen based on the new velocities - - :param cfg: the configuration to be changed. This represents the current - position - :param cfg1: a configuration to shift towards. Should be the local best - position - :param cfg2: a configuration to shift towards. Should be the global best - position - :param c: the weight of the current velocities - :param c1: weight of cfg1 - :param c2: weight of cfg2 - :param velocities: the current velocities - :return: a numpy array of new velocities - """ - vs = velocities * c + (self.get_value(cfg1) - self.get_value( - cfg)) * c1 * random.random() + (self.get_value( - cfg2) - self.get_value(cfg)) * c2 * random.random() - # Map velocity to continuous space with sigmoid - ss = 1 / (1 + numpy.exp(-vs)) - # Decide position randomly - ps = (ss - numpy.random.rand(1, self.size)) > 0 - self.set_value(cfg, ps) - return vs - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration randomly - - :param config: the configuration to be changed - """ - value = numpy.random.rand(1, self.size) > 0.5 - self._set(config, value) - - def seed_value(self): - return numpy.random.rand(1, self.size) > 0.5 - - -class FloatArray(Array): - """ - Represents an array of float values - """ - def __init__(self, name, size, fmax, fmin): - super(FloatArray, self).__init__(name, size) - self.fmax = fmax - self.fmin = fmin - - def op1_randomize(self, config): - """ - Set this parameter's value in a configuration randomly - - :param config: the configuration to be changed - """ - value = numpy.random.rand(1, self.size) * ( - self.fmax - self.fmin) + self.fmin - self._set(config, value) - - def seed_value(self): - value = numpy.random.rand(1, self.size) * ( - self.fmax - self.fmin) + self.fmin - return value - - def op3_swarm_parallel(self, cfg, cfg1, cfg2, c=1, - c1=0.5, c2=0.5, velocities=0): - """ - Simulates a single particle swarm optimization step for each element in the - array by updating the each position and returning an array of new velocities - - The new velocity is given by - - .. math:: c*velocity + r1*c1*(cfg1-cfg) + r2*c2*(cfg2-cfg) - - where r1 and r2 are random values between 0 and 1. In each iteration, r1 and - r2 are constant across array elements - - The new cfg positions are randomly chosen based on the new velocities - - :param cfg: the configuration to be changed. This represents the current - position - :param cfg1: a configuration to shift towards. Should be the local best - position - :param cfg2: a configuration to shift towards. Should be the global best - position - :param c: the weight of the cfg velocities - :param c1: weight of cfg1 - :param c2: weight of cfg2 - :param velocities: the cfg velocities - :return: a numpy array of new velocities - """ - vs = velocities * c + (self.get_value(cfg1) - self.get_value( - cfg)) * c1 * random.random() + (self.get_value( - cfg2) - self.get_value(cfg)) * c2 * random.random() - p = self.get_value(cfg) + vs - p[p > self.fmax] = self.fmax - p[p < self.fmin] = self.fmin - self.set_value(cfg, p) - return vs - - -################## - -class ManipulatorProxy(object): - """ - wrapper around configuration manipulator and config pair - """ - - def __init__(self, manipulator, cfg): - self.cfg = cfg - self.manipulator = manipulator - self.params = manipulator.parameters_dict(self.cfg) - - def keys(self): - return self.params.keys() - - def __getitem__(self, k): - return ParameterProxy(self.params[k], self.cfg) - - -class ParameterProxy(object): - """ - wrapper aint parameter and config pair, adds config - as first argument to all method calls to parameter - """ - - def __init__(self, param, cfg): - self.cfg = cfg - self.param = param - - def __getattr__(self, key): - """equivalent of self.param.key(self.cfg, ...)""" - member = getattr(self.param, key) - - def param_method_proxy(*args, **kwargs): - return member(self.cfg, *args, **kwargs) - - if callable(member): - return param_method_proxy - else: - # we should only hit this for key == 'name' - return member - - -# Inspection Methods -def operators(param, num_parents): - """ - Return a list of operators for the given parameter that take the specified - number of input configurations - - :param param: a Parameter class - :param num_parents: a String specifying number of inputs required by the operator. - should be one of '1', '2', '3', '4', or 'n' - """ - ops = [] - methods = inspect.getmembers(param, inspect.ismethod) - for m in methods: - name, obj = m - if is_operator(name, num_parents): - ops.append(name) - return ops - -def composable_operators(param, min_num_parents): - """ - Return a list of operators for the given parameter that can be programatically composed - with a composable technique generating min_num_parents. - - Programatically composable operators have no non-cfg arguments - - :param param: a Parameter class - :param min_num_parents: the minimum number of parents passed to the operator - """ - if min_num_parents < 1: - return [] - - allowed_num_parents = ['n'] - for i in range(1,5): - if i > min_num_parents: - break - allowed_num_parents.append(str(i)) - - ops = [] - methods = inspect.getmembers(param, inspect.ismethod) - for m in methods: - name, obj = m - argspec = inspect.getargspec(obj) - numargs = len(argspec.args) - (len(argspec.defaults) if argspec.defaults else 0) - for num_parents in allowed_num_parents: - if is_operator(name, num_parents): - if num_parents == 'n': - if numargs == 3: # self, cfg, cfgs - ops.append(name) - else: - if numargs == (1 + int(num_parents)): - ops.append(name) - break - return ops - - -def is_operator(name, num_parents): - """ - Tells whether a method is an operator taking in the specified number of inputs - from the method name - - :param name: the method name - :param num_parents: a String specifying number of inputs required by the operator. - should be one of '1', '2', '3', '4', or 'n' - """ - return ('op' + num_parents + '_') == name[:4] - -def all_operators(): - """ - Return a dictionary mapping from parameter names to lists of operator function - names - """ - ops = {} - for p in all_params(): - name, obj = p - all_ops = [] - for num in ['1', '2', '3', '4', 'n']: - all_ops += operators(obj, num) - ops[name] = all_ops - return ops - -def all_params(): - params = inspect.getmembers(sys.modules[__name__], lambda x: inspect.isclass( - x) and x.__module__ == __name__ and issubclass(x, Parameter)) - return params - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/metatechniques.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/metatechniques.py deleted file mode 100644 index 2e33e7961ab2d7f9b16ea48cb680dd751af32d7a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/metatechniques.py +++ /dev/null @@ -1,186 +0,0 @@ -import abc -import logging -from collections import deque, defaultdict -from fn import _ - -from .technique import SearchTechniqueBase - -log = logging.getLogger(__name__) - -class MetaSearchTechnique(SearchTechniqueBase): - """ - a technique made up of a collection of other techniques - """ - def __init__(self, techniques, log_freq = 500, *pargs, **kwargs): - super(MetaSearchTechnique, self).__init__(*pargs, **kwargs) - self.techniques = techniques - self.request_count = 0 - self.log_freq = log_freq - self.logging_use_counters = defaultdict(int) - self.unique_names() - - def unique_names(self): - names = set() - for t in self.techniques: - while t.name in names: - t.name += '~' - t.name = intern(t.name) - names.add(t.name) - - def set_driver(self, driver): - super(MetaSearchTechnique, self).set_driver(driver) - for t in self.techniques: - t.set_driver(driver) - self.driver = driver - - def desired_result(self): - techniques = self.select_technique_order() - for technique in techniques: - dr = technique.desired_result() - if dr is not None: - if dr is False: - # technique is waiting for results - continue - self.driver.register_result_callback(dr, - lambda result: self.on_technique_result(technique, result)) - if self.log_freq: - self.logging_use_counters[technique.name] += 1 - self.debug_log() - self.request_count += 1 - return dr - else: - self.on_technique_no_desired_result(technique) - return None - - def on_technique_no_desired_result(self, technique): - """called if a sub-technique returns None""" - pass - - def on_technique_result(self, technique, result): - """callback for results of sub-techniques""" - pass - - @abc.abstractmethod - def select_technique_order(self): - """select the order of next techniques to try""" - return [] - - def debug_log(self): - if self.log_freq and sum(self.logging_use_counters.values())>self.log_freq: - log.info("%s: %s", self.name, - str(sorted(self.logging_use_counters.items(), key = _[1]*-1))) - self.logging_use_counters = defaultdict(int) - -class RoundRobinMetaSearchTechnique(MetaSearchTechnique): - """evenly switch between all source techniques""" - def __init__(self, techniques, **kwargs): - techniques = deque(techniques) - super(RoundRobinMetaSearchTechnique, self).__init__(techniques, **kwargs) - - def select_technique_order(self): - rv = list(self.techniques) - self.techniques.rotate(1) - return rv - -class RecyclingMetaTechnique(MetaSearchTechnique): - """ - periodically restart techniques that are not performing well compared to - global best - """ - def __init__(self, - techniques_generators, - window = 100, - factor = 5.0, - **kwargs): - if 'log_freq' not in kwargs: - kwargs['log_freq'] = None - techniques = deque((g(seed_cfg = None) for g in techniques_generators)) - self.rename_i = 0 - for t in techniques: - self.rename_technique(t) - super(RecyclingMetaTechnique, self).__init__(techniques, **kwargs) - self.best_results = defaultdict(lambda: None) - self.factor = factor - self.last_check = 0 - self.old_best_results = defaultdict(lambda: None) - self.technique_generators = deque(techniques_generators) - self.window = window - - def rename_technique(self, technique): - technique.name += ".R%d" % self.rename_i - self.rename_i += 1 - - def on_technique_result(self, technique, result): - """callback for results of sub-techniques""" - if (self.best_results[technique] is None or - self.driver.objective.lt(result, self.best_results[technique])): - self.best_results[technique] = result - - def technique_cmp(self, a, b): - # a1 = self.old_best_results[a] - # a2 = self.best_results[a] - # b1 = self.old_best_results[b] - # b2 = self.best_results[b] - # if a1 is None and b1 is None: - # return 0 - # if a1 is None: - # return -1 - # if b1 is None: - # return 1 - # return self.driver.objective.project_compare(a1, a2, b1, b2, self.factor) - - # not ready techniques go to the back - if not a.is_ready() or not b.is_ready(): - return cmp(b.is_ready(), a.is_ready()) - - a = self.best_results[a] - b = self.best_results[b] - if a is None and b is None: - return 0 - if a is None: - return -1 - if b is None: - return 1 - return self.driver.objective.compare(a, b) - - def recycle_techniques(self): - techniques = list(self.techniques) - techniques.sort(cmp=self.technique_cmp) - worst = techniques[-1] - - if (not worst.is_ready() - or (self.old_best_results[worst] is not None - and self.driver.objective.lt(self.driver.best_result, - self.best_results[worst]))): - techniques_new = deque() - tn = None - for t, gen in zip(self.techniques, self.technique_generators): - if t is worst: - tn = gen(seed_cfg=self.driver.best_result.configuration.data) - self.rename_technique(tn) - tn.set_driver(self.driver) - log.info("%s replacing %s with %s", self.name, t.name, tn.name) - techniques_new.append(tn) - else: - techniques_new.append(t) - self.techniques = techniques_new - else: - log.debug("%s: not replacing techniques", self.name) - - self.old_best_results = self.best_results - self.best_results = defaultdict(lambda: None) - for t in self.techniques: - self.best_results[t] = self.old_best_results[t] - - def select_technique_order(self): - """ - round robin between techniques - """ - if self.last_check + self.window < self.request_count: - self.last_check = self.request_count - self.recycle_techniques() - rv = list(self.techniques) - self.techniques.rotate(1) - self.technique_generators.rotate(1) - return rv - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/objective.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/objective.py deleted file mode 100644 index b46a2f54b2f0922f774548c1c2d009ffa581512e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/objective.py +++ /dev/null @@ -1,338 +0,0 @@ -import abc -import logging - -from fn import _ - -import opentuner -from opentuner.resultsdb.models import * - -log = logging.getLogger(__name__) - - -class SearchObjective(object): - """ - delegates the comparison of results and configurations - """ - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def result_order_by_terms(self): - """return database columns required to order by the objective""" - return [] - - @abc.abstractmethod - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - return - - def config_compare(self, config1, config2): - """cmp() compatible comparison of resultsdb.models.Configuration""" - return self.result_compare(self.driver.results_query(config=config1).one(), - self.driver.results_query(config=config2).one()) - - @abc.abstractmethod - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - return - - def config_relative(self, config1, config2): - """return None, or a relative goodness of resultsdb.models.Configuration""" - return self.result_relative(self.driver.results_query(config=config1).one(), - self.driver.results_query(config=config2).one()) - - - def __init__(self): - self.driver = None - - def set_driver(self, driver): - self.driver = driver - - def result_order_by(self, q): - return q.order_by(*self.result_order_by_terms()) - - def compare(self, a, b): - """cmp() compatible compare""" - if isinstance(a, Configuration): - return self.config_compare(a, b) - if isinstance(a, Result): - return self.result_compare(a, b) - assert False - - def relative(self, a, b): - if isinstance(a, Configuration): - return self.config_relative(a, b) - if isinstance(a, Result): - return self.result_relative(a, b) - assert None - - def lt(self, a, b): - return self.compare(a, b) < 0 - - def lte(self, a, b): - return self.compare(a, b) <= 0 - - def gt(self, a, b): - return self.compare(a, b) > 0 - - def gte(self, a, b): - return self.compare(a, b) >= 0 - - def min(self, *l): - if len(l) == 1: - l = l[0] - rv = l[0] - for i in l[1:]: - if self.lt(i, rv): - rv = i - return rv - - def max(self, *l): - if len(l) == 1: - l = l[0] - rv = l[0] - for i in l[1:]: - if self.gt(i, rv): - rv = i - return rv - - def limit_from_config(self, config): - """ - a time limit to kill a result after such that it can be compared to config - """ - results = self.driver.results_query(config=config) - if results.count() == 0: - return None - else: - return max(map(_.time, self.driver.results_query(config=config))) - - - def project_compare(self, a1, a2, b1, b2, factor=1.0): - """ - linearly project both a and b forward to see how they will compare in the - future - """ - a3 = Result() - b3 = Result() - a3.time = _project(a1.time, a2.time, factor) - a3.accuracy = _project(a1.accuracy, a2.accuracy, factor) - a3.energy = _project(a1.energy, a2.energy, factor) - a3.confidence = _project(a1.confidence, a2.confidence, factor) - return self.result_compare(a3, b3) - - def display(self, result): - """ - produce a string version of a resultsdb.models.Result() - """ - rv = [] - for k in ('time', 'accuracy', 'energy', 'size', 'confidence'): - v = getattr(result, k) - if v is not None: - rv.append('%s=%.4f' % (k, float(v))) - return ', '.join(rv) - - def filter_acceptable(self, query): - """Return a Result() query that only returns acceptable results""" - return query - - def is_acceptable(self, result): - """Test if a Result() meets thresholds""" - return True - - def stats_quality_score(self, result, worst_result, best_result): - """return a score for statistics""" - if not self.is_acceptable(result): - return worst_result.time - else: - return result.time - - -def _project(a1, a2, factor): - if a1 is None or a2 is None: - return None - return a2 + factor * (a2 - a1) - - -class MinimizeTime(SearchObjective): - """ - minimize Result().time - """ - - def result_order_by_terms(self): - """return database columns required to order by the objective""" - return [Result.time] - - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - return cmp(result1.time, result2.time) - - def config_compare(self, config1, config2): - """cmp() compatible comparison of resultsdb.models.Configuration""" - return cmp(min(map(_.time, self.driver.results_query(config=config1))), - min(map(_.time, self.driver.results_query(config=config2)))) - - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - if result2.time == 0: - return float('inf') * result1.time - return result1.time / result2.time - -class MinimizeSize(SearchObjective): - - def result_order_by_terms(self): - """return database columns required to order by the objective""" - return [Result.size] - - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - return cmp(result1.size, result2.size) - - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - if result2.size == 0: - return float('inf') * result1.size - return result1.size / result2.size - - -class MinimizeSizeMinimizeTime(SearchObjective): - """ - minimize Result.size() and Result.time() - """ - def result_order_by_terms(self): - """return database columns required to order by the objective""" - return [Result.time, Result.size] - - - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - return cmp((result1.time, result1.size),(result2.time,result2.size)) - - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - log.warning('result_relative() not yet implemented for %s', - self.__class__.__name__) - -class MaximizeAccuracy(SearchObjective): - """ - maximize Result().accuracy - """ - - def result_order_by_terms(self): - """return database columns required to order by the objective""" - return [-Result.accuracy] - - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - # note opposite order - return cmp(result2.accuracy, result1.accuracy) - - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - # note opposite order - if result1.accuracy == 0: - return float('inf') * result2.accuracy - return result2.accuracy / result1.accuracy - - def stats_quality_score(self, result, worst_result, best_result): - """return a score for statistics""" - if not self.is_acceptable(result): - return worst_result.time - else: - return result.time - - def stats_raw_score(self, result): - return result.accuracy - - -class MaximizeAccuracyMinimizeSize(MaximizeAccuracy): - """ - maximize Result().accuracy, break ties with Result().size - """ - - def result_order_by_terms(self): - """return database columns required to order by the objective""" - return [-Result.accuracy, Result.size] - - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - return cmp((-result1.accuracy, result1.size), - (-result2.accuracy, result2.size)) - - def display(self, result): - """ - produce a string version of a resultsdb.models.Result() - """ - return "accuracy=%.8f, size=%.1f" % (result.accuracy, result.size) - - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - # unimplemented for now - log.warning('result_relative() not yet implemented for %s', - self.__class__.__name__) - return None - - -class ThresholdAccuracyMinimizeTime(SearchObjective): - """ - if accuracy >= target: - minimize time - else: - maximize accuracy - """ - - def __init__(self, accuracy_target, low_accuracy_limit_multiplier=10.0): - self.accuracy_target = accuracy_target - self.low_accuracy_limit_multiplier = low_accuracy_limit_multiplier - super(ThresholdAccuracyMinimizeTime, self).__init__() - - def result_order_by_terms(self): - """return database columns required to order by the objective""" - - return ["min(accuracy, %f) desc" % self.accuracy_target, - opentuner.resultsdb.models.Result.time] - - def result_compare(self, result1, result2): - """cmp() compatible comparison of resultsdb.models.Result""" - return cmp((-min(self.accuracy_target, result1.accuracy), - result1.time), - (-min(self.accuracy_target, result2.accuracy), result2.time)) - - def config_compare(self, config1, config2): - """cmp() compatible comparison of resultsdb.models.Configuration""" - return self.result_compare( - self.driver.results_query(config=config1, objective_ordered=True)[0], - self.driver.results_query(config=config2, objective_ordered=True)[0]) - - def limit_from_config(self, config): - """ - a time limit to kill a result after such that it can be compared to config - """ - results = self.driver.results_query(config=config) - if results.count() == 0: - return None - if self.accuracy_target > min(map(_.accuracy, results)): - m = self.low_accuracy_limit_multiplier - else: - m = 1.0 - return m * max(map(_.time, results)) - - - def filter_acceptable(self, query): - """Return a Result() query that only returns acceptable results""" - return query.filter(opentuner.resultsdb.models.Result.accuracy - >= self.accuracy_target) - - def is_acceptable(self, result): - """Test if a Result() meets thresholds""" - return result.accuracy >= self.accuracy_target - - def result_relative(self, result1, result2): - """return None, or a relative goodness of resultsdb.models.Result""" - # unimplemented for now - log.warning('result_relative() not yet implemented for %s', - self.__class__.__name__) - return None - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/patternsearch.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/patternsearch.py deleted file mode 100644 index 7b526e7897f2c673552899ae3a115d6e2e06737b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/patternsearch.py +++ /dev/null @@ -1,72 +0,0 @@ - - -from opentuner.search import technique - -class PatternSearch(technique.SequentialSearchTechnique): - def main_generator(self): - - objective = self.objective - driver = self.driver - manipulator = self.manipulator - - # start at a random position - center = driver.get_configuration(manipulator.random()) - self.yield_nonblocking(center) - - # initial step size is arbitrary - step_size = 0.1 - - while True: - points = list() - for param in manipulator.parameters(center.data): - if param.is_primitive(): - # get current value of param, scaled to be in range [0.0, 1.0] - unit_value = param.get_unit_value(center.data) - - if unit_value > 0.0: - # produce new config with param set step_size lower - down_cfg = manipulator.copy(center.data) - param.set_unit_value(down_cfg, max(0.0, unit_value - step_size)) - down_cfg = driver.get_configuration(down_cfg) - self.yield_nonblocking(down_cfg) - points.append(down_cfg) - - if unit_value < 1.0: - # produce new config with param set step_size higher - up_cfg = manipulator.copy(center.data) - param.set_unit_value(up_cfg, min(1.0, unit_value + step_size)) - up_cfg = driver.get_configuration(up_cfg) - self.yield_nonblocking(up_cfg) - points.append(up_cfg) - - else: # ComplexParameter - for mutate_function in param.manipulators(center.data): - cfg = manipulator.copy(center.data) - mutate_function(cfg) - cfg = driver.get_configuration(cfg) - self.yield_nonblocking(cfg) - points.append(cfg) - - - yield None # wait for all results - - #sort points by quality, best point will be points[0], worst is points[-1] - points.sort(cmp=objective.compare) - - if (objective.lt(driver.best_result.configuration, center) - and driver.best_result.configuration != points[0]): - # another technique found a new global best, switch to that - center = driver.best_result.configuration - elif objective.lt(points[0], center): - # we found a better point, move there - center = points[0] - else: - # no better point, shrink the pattern - step_size /= 2.0 - -# register our new technique in global list -technique.register(PatternSearch()) - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/plugin.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/plugin.py deleted file mode 100644 index ad8481837cbee62ba8c3f1c94a27529261953bb0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/plugin.py +++ /dev/null @@ -1,152 +0,0 @@ -import abc -import argparse -import logging -import time - -from datetime import datetime -from fn import _ - -log = logging.getLogger(__name__) -display_log = logging.getLogger(__name__ + ".DisplayPlugin") - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--results-log', - help="file to store log of the best configuration times") -argparser.add_argument('--results-log-details', - help="file to store log of the non-best configuration times") -argparser.add_argument('--quiet', action='store_true', - help="print less information") -argparser.add_argument('--display-frequency', default=10, type=int, - help="how often for DisplayPlugin to print") - -class SearchPlugin(object): - @property - def priority(self): - """control order the plugin hooks gets run in, lower runs first""" - return 0 - - def set_driver(self, driver): - """called before all other methods""" - self.driver = driver - - def before_main(self): pass - def after_main(self): pass - - def before_techniques(self): pass - def after_techniques(self): pass - - def before_results_wait(self): pass - def after_results_wait(self): pass - - def on_result(self, result): - """ - called once for every new result - """ - pass - - def on_result_for_technique(self, result, technique): - """ - called right before a result is given to a technique - (result may be requested by multiple techniques) - """ - pass - - def on_new_best_result(self, result): - """ - called whenever the global best result changes - """ - pass - -class DisplayPlugin(SearchPlugin): - __metaclass__ = abc.ABCMeta - def __init__(self, display_period=5): - super(DisplayPlugin, self).__init__() - self.last = time.time() - self.start = time.time() - self.display_period = display_period - - def after_results_wait(self): - t = time.time() - if t - self.display_period > self.last: - # call display every 5 seconds - self.last = t - self.display(t) - - def after_main(self): - self.display() - - @abc.abstractmethod - def display(self, t=None): - pass - - -class LogDisplayPlugin(DisplayPlugin): - def display(self, t=None): - if not t: - t = time.time() - count = self.driver.results_query().count() - best = self.driver.results_query(objective_ordered = True).first() - if best is None: - log.warning("no results yet") - return - requestor = ','.join(map(_.requestor, best.desired_results)) - display_log.info("tests=%d, best %s, cost %s, found by %s", - count, - cfg_repr(best.configuration), - self.driver.objective.display(best), - requestor, - ) - -class FileDisplayPlugin(SearchPlugin): - def __init__(self, out, details, *args, **kwargs): - super(FileDisplayPlugin, self).__init__(*args, **kwargs) - self.last_best = float('inf') - self.start_date = datetime.now() - if out: - self.out = open(out, "w") - else: - self.out = None - if out == details: - self.details = self.out - self.out = None - elif details: - self.details = open(details, "w") - else: - self.details = None - - def on_result(self, result): - if self.out and result.time < self.last_best: - self.last_best = result.time - print >>self.out, \ - (result.collection_date - self.start_date).total_seconds(), \ - result.time - self.out.flush() - if self.details: - print >>self.details, \ - (result.collection_date - self.start_date).total_seconds(), \ - result.time - self.details.flush() - -def get_enabled(args): - plugins = [] - if not args.quiet: - plugins.append(LogDisplayPlugin(args.display_frequency)) - if args.results_log or args.results_log_details: - plugins.append(FileDisplayPlugin(args.results_log, - args.results_log_details)) - return plugins - -def cfg_repr(cfg): - try: - s = repr(cfg.data) - if len(s) < 100: - return s - except: - pass - return "#{0}".format(cfg.id) - - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/pso.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/pso.py deleted file mode 100644 index 3b8c37a7787b900a70f80ffab00d5c90b46c7541..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/pso.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab autoindent smarttab -from manipulator import * -from opentuner.search import technique -import random -import math - -class PSO(technique.SequentialSearchTechnique ): - """ Particle Swarm Optimization """ - def __init__(self, crossover, N = 30, init_pop=None, *pargs, **kwargs): - """ - crossover: name of crossover operator function - """ - super(PSO, self).__init__(*pargs, **kwargs) - self.crossover = crossover - self.name = 'pso-'+crossover.replace("op3_cross_","") - self.init_pop = init_pop - self.N = N - - def main_generator(self): - - objective = self.objective - driver = self.driver - m = self.manipulator - def config(cfg): - return driver.get_configuration(cfg) - - population = self.init_pop - if not population: - population = [HybridParticle(m, self.crossover) for i in range(self.N)] - - for p in population: - yield driver.get_configuration(p.position) - - while True: - for particle in population: - g = driver.best_result.configuration.data - old=m.copy(particle.position) - particle.move(g) - yield config(particle.position) - # update individual best - if objective.lt(config(particle.position), config(particle.best)): - particle.best = particle.position - -class HybridParticle(object): - def __init__(self, m, crossover_choice, omega=0.5, phi_l=0.5, phi_g=0.5): - - """ - m: a configuraiton manipulator - omega: influence of the particle's last velocity, a float in range [0,1] ; omega=1 means even speed - phi_l: influence of the particle's distance to its historial best position, a float in range [0,1] - phi_g: influence of the particle's distance to the global best position, a float in range [0,1] - """ - - self.manipulator = m - self.position = self.manipulator.random() - self.best = self.position - self.omega = omega - self.phi_l = phi_l - self.phi_g = phi_g - self.crossover_choice = crossover_choice - self.velocity = {} - for p in self.manipulator.params: - # Velocity as a continous value - self.velocity[p.name]=0 - - def move(self, global_best): - """ - Update parameter values using corresponding operators. - TODO: introduce operator choice map - """ - m = self.manipulator - for p in m.params: - self.velocity[p.name] = p.op3_swarm(self.position, global_best, self.best, c=self.omega, c1=self.phi_g, c2=self.phi_l, xchoice=self.crossover_choice, velocity=self.velocity[p.name]) - - -technique.register(PSO(crossover = 'op3_cross_OX3')) -technique.register(PSO(crossover = 'op3_cross_OX1')) -technique.register(PSO(crossover = 'op3_cross_PMX')) -technique.register(PSO(crossover = 'op3_cross_PX')) -technique.register(PSO(crossover = 'op3_cross_CX')) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/simplextechniques.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/simplextechniques.py deleted file mode 100644 index 3cfec0eebb25cf3c7ff2cc2bc69d558454660e32..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/simplextechniques.py +++ /dev/null @@ -1,457 +0,0 @@ -import abc -import logging -import math -from collections import defaultdict -from fn import _ -from fn.iters import map, filter -from .manipulator import Parameter -from .metatechniques import RecyclingMetaTechnique -from .technique import SequentialSearchTechnique, register - -log = logging.getLogger(__name__) - - -class SimplexTechnique(SequentialSearchTechnique): - """ - Base class with utility functions common - to simplex type methods - """ - - def __init__(self, seed_cfg=None, *args, **kwargs): - super(SimplexTechnique, self).__init__(*args, **kwargs) - self.centroid = None - self.last_simplex_points = None - self.seed_cfg = seed_cfg - self.simplex_points = [] - - def calculate_centroid(self): - """ - average of all the PrimitiveParameters in self.simplex_points - ComplexParameters are copied from self.simplex_points[0] - """ - sums = defaultdict(float) - counts = defaultdict(int) - - for config in self.simplex_points: - cfg = config.data - for param in self.manipulator.parameters(cfg): - if param.is_primitive(): - sums[param.name] += param.get_unit_value(cfg) - counts[param.name] += 1 - - centroid = self.manipulator.copy(self.simplex_points[0].data) - for param in self.manipulator.parameters(centroid): - if param.is_primitive(): - param.set_unit_value(centroid, - sums[param.name] / float(counts[param.name])) - - return centroid - - def cfg_to_str(self, cfg): - params = list(filter(Parameter.is_primitive, - self.manipulator.parameters(cfg))) - params.sort(key=_.name) - return str(tuple(map(lambda x: x.get_unit_value(cfg), params))) - - def debug_log(self): - for i, config in enumerate(self.simplex_points): - log.debug("simplex_points[%d] = %s", i, self.cfg_to_str(config.data)) - if self.centroid: - log.debug("centroid = %s", self.cfg_to_str(self.centroid)) - - def linear_point(self, p1, p2, scale): - """ - return a point on the line passing between p1 and p2 at position scale - such that p1 + scale*(p1 - p2) - """ - return self.manipulator.linear_config(1.0, p1, scale, p1, -scale, p2) - - def convergence_criterea(self): - """True will cause the simplex method to stop""" - if self.rounds_since_novel_request > 3 * len(self.simplex_points) + 1: - return True - if self.last_simplex_points == self.simplex_points: - return True - self.last_simplex_points = list(self.simplex_points) - return False - - def initial_simplex_seed(self): - """ - return a point to base the initial simplex on - """ - if self.seed_cfg is not None: - return self.seed_cfg - return self.manipulator.random() - - @abc.abstractmethod - def initial_simplex(self): - """ - return a initial list of configurations - """ - return [] - - -class RandomInitialMixin(object): - """ - start with random initial simplex - """ - - def initial_simplex(self): - # we implicitly assume number of parameters is fixed here, however - # it will work if it isn't (simplex size is undefined) - cfg0 = self.initial_simplex_seed() - params = self.manipulator.parameters(cfg0) - return [cfg0] + [self.manipulator.random() - for p in params - if p.is_primitive()] - - -class RightInitialMixin(object): - """ - start with random initial right triangle like simplex - """ - - def __init__(self, initial_unit_edge_length=0.1, *args, **kwargs): - assert initial_unit_edge_length <= 0.5 - self.initial_unit_edge_length = initial_unit_edge_length - super(RightInitialMixin, self).__init__(*args, **kwargs) - - def initial_simplex(self): - cfg0 = self.initial_simplex_seed() - simplex = [cfg0] - params = self.manipulator.parameters(cfg0) - params = filter(lambda x: x.is_primitive(), params) - for p in params: - simplex.append(self.manipulator.copy(cfg0)) - v = p.get_unit_value(simplex[-1]) - if v <= 0.5: - v += self.initial_unit_edge_length - else: - v -= self.initial_unit_edge_length - p.set_unit_value(simplex[-1], v) - return simplex - - -class RegularInitialMixin(object): - """ - start with random initial regular simplex (all edges equal length) - """ - - def __init__(self, initial_unit_edge_length=0.1, *args, **kwargs): - assert initial_unit_edge_length <= 0.5 - self.initial_unit_edge_length = initial_unit_edge_length - super(RegularInitialMixin, self).__init__(*args, **kwargs) - - def initial_simplex(self): - cfg0 = self.initial_simplex_seed() - simplex = [cfg0] - params = self.manipulator.parameters(cfg0) - params = list(filter(lambda x: x.is_primitive(), params)) - if len(params) == 0: - return simplex - - q = (((math.sqrt(len(params) + 1.0) - 1.0) / (len(params) * math.sqrt(2.0))) - * self.initial_unit_edge_length) - p = q + ((1.0 / math.sqrt(2.0)) * self.initial_unit_edge_length) - - base = [x.get_unit_value(cfg0) for x in params] - for j in xrange(len(base)): - if max(p, q) + base[j] > 1.0: - #flip this dimension as we would overflow our [0,1] bounds - base[j] *= -1.0 - - for i in xrange(len(params)): - simplex.append(self.manipulator.copy(cfg0)) - params[i].set_unit_value(simplex[-1], abs(base[i] + p)) - for j in xrange(i + 1, len(params)): - params[j].set_unit_value(simplex[-1], abs(base[i] + q)) - - return simplex - - -class NelderMead(SimplexTechnique): - """ - Nelder-Mead downhill simplex method. - - Based on description of method on page 82 of - 'Noisy Optimization With Evolution Strategies' by Dirk V. Arnold. - - We set alpha=2.0 by default instead of the often recommended alpha=1.0 to - avoid a common degenerate case, where the volume of the simplex becomes zero. - This is easiest to see with a single parameter. Let the simplex points - be x0,x1. Let the centroid be c=(x0+x1)/2.0 and the reflection point be: - reflection = c + alpha*(c-x1) = (x0+x1)*(1+alpha)/2 - x1 - The problem is, if we set alpha = 1.0, then the x1's cancel out and the - reflection point becomes just reflection=x0, which also happens to be the - second best point, meaning we will use it. So in a single step of the - algorithm the simplex becomes singular. - """ - - def __init__(self, - alpha=2.0, - gamma=2.0, - beta=0.5, - sigma=0.5, - *args, **kwargs): - self.alpha = alpha - self.gamma = gamma - self.beta = beta - self.sigma = sigma - super(NelderMead, self).__init__(*args, **kwargs) - - @classmethod - def get_hyper_parameters(cls): - return ['alpha', 'gamma', 'beta', 'sigma'] - - - def main_generator(self): - objective = self.objective - driver = self.driver - - # test the entire initial simplex - self.simplex_points = list(map(driver.get_configuration, - self.initial_simplex())) - - if len(self.simplex_points) <= 1: - log.warning("only 1 point in simplex, will not use %s", self.name) - return - - log.debug("initial points") - for p in self.simplex_points: - self.yield_nonblocking(p) - yield None # wait until results are ready - - while not self.convergence_criterea(): - # next steps assume this ordering - self.simplex_points.sort(cmp=objective.compare) - # set limit from worst point - self.limit = objective.limit_from_config(self.simplex_points[-1]) - self.centroid = self.calculate_centroid() - if log.isEnabledFor(logging.DEBUG): - self.debug_log() - - reflection = self.reflection_point() - yield reflection - - if objective.lt(reflection, self.simplex_points[0]): - #expansion case - expansion = self.expansion_point(reflection) - yield expansion - - if objective.lt(expansion, reflection): - log.debug("using expansion point") - self.simplex_points[-1] = expansion - else: - log.debug("using reflection point (considered expansion)") - self.simplex_points[-1] = reflection - - elif objective.lt(reflection, self.simplex_points[1]): - #reflection case - log.debug("using reflection point") - self.simplex_points[-1] = reflection - else: - # contraction case - if objective.lte(reflection, self.simplex_points[-1]): - # outside contraction - contract_base = reflection - else: - # inside contraction - contract_base = self.simplex_points[-1] - - contraction = self.contraction_point(contract_base) - yield contraction - - if objective.lte(contraction, contract_base): - log.debug("using contraction point") - self.simplex_points[-1] = contraction - else: - #reduction case - log.debug("performing shrink reduction") - self.perform_shrink_reduction() - for p in self.simplex_points: - self.yield_nonblocking(p) - yield None # wait until results are ready - - def reflection_point(self): - """ - reflect worst point across centroid - """ - return self.driver.get_configuration( - self.linear_point(self.centroid, - self.simplex_points[-1].data, - self.alpha)) - - def expansion_point(self, reflection): - """ - reflect worst point across centroid more (by default 2x as much) - """ - return self.driver.get_configuration( - self.linear_point(self.centroid, - reflection.data, - -self.gamma)) - - def contraction_point(self, contract_base): - """ - reflect worst point across centroid less - """ - return self.driver.get_configuration( - self.linear_point(self.centroid, - contract_base.data, - -self.beta)) - - def perform_shrink_reduction(self): - """ - shrink the simplex in size by sigma=1/2 (default), moving it closer to the - best point - """ - for i in xrange(1, len(self.simplex_points)): - self.simplex_points[i] = self.driver.get_configuration( - self.linear_point(self.simplex_points[0].data, - self.simplex_points[i].data, - -self.sigma)) - - -class Torczon(SimplexTechnique): - """ - Torczon multi-directional search algorithm. - - Based on description of method on page 85 of - 'Noisy Optimization With Evolution Strategies' by Dirk V. Arnold. - """ - - def __init__(self, - alpha=1.0, - gamma=2.0, - beta=0.5, - *args, **kwargs): - self.alpha = alpha - self.gamma = gamma - self.beta = beta - super(Torczon, self).__init__(*args, **kwargs) - - @classmethod - def get_hyper_parameters(cls): - return ['alpha', 'gamma', 'beta'] - - - def main_generator(self): - objective = self.objective - driver = self.driver - - # test the entire initial simplex - self.simplex_points = list(map(driver.get_configuration, - self.initial_simplex())) - if len(self.simplex_points) <= 1: - log.warning("only 1 point in simplex, will not use %s", self.name) - return - - log.debug("initial points") - for p in self.simplex_points: - self.yield_nonblocking(p) - yield None # wait until results are ready - self.simplex_points.sort(cmp=objective.compare) - - while not self.convergence_criterea(): - # set limit from worst point - self.limit = objective.limit_from_config(self.simplex_points[-1]) - - if log.isEnabledFor(logging.DEBUG): - self.debug_log() - - reflected = self.reflected_simplex() - yield None # wait until results are ready - reflected.sort(cmp=objective.compare) - - # this next condition implies reflected[0] < simplex_points[0] since - # reflected is sorted and contains simplex_points[0] (saves a db query) - if reflected[0] is not self.simplex_points[0]: - expanded = self.expanded_simplex() - yield None # wait until results are ready - expanded.sort(cmp=objective.compare) - - if objective.lt(expanded[0], reflected[0]): - log.debug("expansion performed") - self.simplex_points = expanded - else: - log.debug("reflection performed") - self.simplex_points = reflected - else: - contracted = self.contracted_simplex() - yield None # wait until results are ready - contracted.sort(cmp=objective.compare) - - log.debug("contraction performed") - self.simplex_points = contracted - - def scaled_simplex(self, scale): - """ - assumes self.simplex_points[0] is best point and returns a new simplex - reflected across self.simplex_points[0] by scale - """ - simplex = list(self.simplex_points) # shallow copy - for i in xrange(1, len(simplex)): - simplex[i] = self.driver.get_configuration( - self.linear_point(simplex[0].data, simplex[i].data, scale)) - self.yield_nonblocking(simplex[i]) - return simplex - - def reflected_simplex(self): - return self.scaled_simplex(self.alpha) - - def expanded_simplex(self): - return self.scaled_simplex(self.gamma) - - def contracted_simplex(self): - return self.scaled_simplex(-self.beta) - - -class RandomNelderMead(RandomInitialMixin, NelderMead): - pass - - -class RightNelderMead(RightInitialMixin, NelderMead): - pass - - -class RegularNelderMead(RegularInitialMixin, NelderMead): - pass - - -class RandomTorczon(RandomInitialMixin, Torczon): - pass - - -class RightTorczon(RightInitialMixin, Torczon): - pass - - -class RegularTorczon(RegularInitialMixin, Torczon): - pass - - -class MultiNelderMead(RecyclingMetaTechnique): - def __init__(self): - super(MultiNelderMead, self).__init__([RightNelderMead, RandomNelderMead, - RegularNelderMead]) - - -class MultiTorczon(RecyclingMetaTechnique): - def __init__(self): - super(MultiTorczon, self).__init__([RightTorczon, RandomTorczon, - RegularTorczon]) - - -register(RandomNelderMead()) -register(RegularNelderMead()) -register(RightNelderMead()) -register(MultiNelderMead()) -register(RandomTorczon()) -register(RegularTorczon()) -register(RightTorczon()) -register(MultiTorczon()) - - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/simulatedannealing.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/simulatedannealing.py deleted file mode 100644 index 45b315f2e6bbceda2822ae72623e8c0032afe66b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/simulatedannealing.py +++ /dev/null @@ -1,133 +0,0 @@ -from opentuner.search import technique -import math -import random -#Default interval steps for cooling schedules -DEFAULT_INTERVAL = 100 - -#Pseudo-annealing - no relative energy input into acceptance function -class PseudoAnnealingSearch(technique.SequentialSearchTechnique): - def __init__(self, - temps = [30,0], #temperature schedule - intervals = [], #duration schedule - loop = True, #Do we loop the schedule if we reach the end? - *pargs, **kwargs): - #fill intervals sufficiently - ext_intervals = list(intervals) - for i in range(len(temps)-len(intervals)-1): - ext_intervals.append(DEFAULT_INTERVAL) - - #create temperature schedule (list of temps) - cool_schedule = [temps[0]] - for i in range(len(temps)-1): - step = (float(temps[i+1]) - temps[i])/ext_intervals[i] - for j in range(ext_intervals[i]): - cool_schedule.append(max(cool_schedule[-1] + step,0)) - - self.cool_schedule = cool_schedule - self.loop = loop - self.scaling = 50 #scaling of acceptance function - - super(PseudoAnnealingSearch,self).__init__(*pargs,**kwargs) - - - def main_generator(self): - objective = self.objective - driver = self.driver - manipulator = self.manipulator - - #Start in a random spot - state = driver.get_configuration(manipulator.random()) - yield state - #schedule counter - counter = 0 - max_time = len(self.cool_schedule)-1 - #Check whether relative objective implemented - has_rel = objective.relative(state,state) is not None - has_rel=False - - while True: - #Determine temperature - temp = self.cool_schedule[min(counter,max_time)] - #scale stepsize with temp and time (arbitrary) - step_size = math.exp(-(20 + counter/100)/(temp+ 1)) - - #get candidate neighbors using manipulator - points = list() - points.append(state) - for param in manipulator.parameters(state.data): - if param.is_primitive(): - # get current value of param, scaled to be in range [0.0, 1.0] - unit_value = param.get_unit_value(state.data) - if unit_value > 0.0: - # produce new config with param set step_size lower - down_cfg = manipulator.copy(state.data) - param.set_unit_value(down_cfg, max(0.0, unit_value - step_size*random.random())) - down_cfg = driver.get_configuration(down_cfg) - self.yield_nonblocking(down_cfg) - points.append(down_cfg) - - if unit_value < 1.0: - # produce new config with param set step_size higher - up_cfg = manipulator.copy(state.data) - param.set_unit_value(up_cfg, min(1.0, unit_value + step_size*random.random())) - up_cfg = driver.get_configuration(up_cfg) - self.yield_nonblocking(up_cfg) - points.append(up_cfg) - else: # ComplexParameter - for mutate_function in param.manipulators(state.data): - cfg = manipulator.copy(state.data) - mutate_function(cfg) - cfg = driver.get_configuration(cfg) - self.yield_nonblocking(cfg) - points.append(cfg) - yield None # wait for all results - - #Relative comparison implemented - if has_rel: - while True: - if len(points) == 0: - state = driver.best_result.configuration - break - candidate = points.pop(random.randint(0,len(points)-1)) - #compare to global best - if random.random() < AcceptanceFunction(1, objective.relative(candidate,driver.best_result.configuration), temp, self.scaling): - state = candidate - break - #No relative compare - else: - #sort points by "energy" (quality) - points.sort(cmp=objective.compare) - - #Make decision about changing state - #probability picking next-best state is exp^(-1/temp) - #repeat and cycle to get state p-dist resembling this - sel = 0 - while AcceptanceFunction(0,1,temp,1)>random.random(): - sel += 1 - state = points[sel%len(points)] - - #switch to the global best if temperature is low (i.e. we aren't moving much) - if AcceptanceFunction(0,1,temp,1)< .0001 and objective.lt(driver.best_result.configuration, state): - state = driver.best_result.configuration - - #update counter - counter +=1 - if counter>max_time and self.loop: - counter=counter-max_time - - -#Acceptance probability function for annealing -def AcceptanceFunction(e,e_new,temp,scaling): - #Standard acceptance probability function using relative "goodness" - if e>=e_new: - return 1 - if temp == 0: - return 0 - if scaling*(e_new-e)/temp > 10: - #for practical purposes, probability is too low. - return 0 - return math.exp(scaling*(e-e_new)/temp) - - -#register technique -technique.register(PseudoAnnealingSearch()) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/technique.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/technique.py deleted file mode 100644 index 849391df9bb37454301c90a520fbbe6b5025c683..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/search/technique.py +++ /dev/null @@ -1,358 +0,0 @@ -import abc -import argparse -import logging -import os -import random -import sys - -from importlib import import_module -from datetime import datetime -from fn import _ - -from opentuner.resultsdb.models import * -from plugin import SearchPlugin - -log = logging.getLogger(__name__) -#log.setLevel(logging.DEBUG) - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--technique','-t', action='append', - help="which technique to use") -argparser.add_argument('--list-techniques','-lt', action='store_true', - help="list techniques available and exit") -argparser.add_argument('--generate-bandit-technique','-gbt', action='store_true', - help="randomly generate a bandit to use") - -class SearchTechniqueBase(object): - """ - abstract base class for search techniques, with minimal interface - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, name = None): - super(SearchTechniqueBase, self).__init__() - if name: - self.name = name - else: - self.name = self.default_name() - - def is_ready(self): - """test if enough data has been gathered to use this technique""" - return True - - def default_name(self): - """name of this SearchTechnique uses for display/accounting""" - return self.__class__.__name__ - - def handle_requested_result(self, result): - """called for each new Result(), requested by this technique""" - pass - - @abc.abstractmethod - def set_driver(self, driver): - """called at start of tuning process""" - return - - @abc.abstractmethod - def desired_result(self): - """ - return at most count resultsdb.models.DesiredResult objects based on past - performance - """ - return - -class SearchTechnique(SearchPlugin, SearchTechniqueBase): - """ - a search search technique with basic utility functions - """ - - def __init__(self, *pargs, **kwargs): - super(SearchTechnique, self).__init__(*pargs, **kwargs) - self.driver = None - self.manipulator = None - self.objective = None - self.request_count = 0 - - def set_driver(self, driver): - super(SearchTechnique, self).set_driver(driver) - self.manipulator = driver.manipulator - self.objective = driver.objective - driver.add_plugin(self) - - def desired_result(self): - """ - create and return a resultsdb.models.DesiredResult - returns None if no desired results and False if waiting for results - """ - cfg = self.desired_configuration() - if cfg is None: - return None - if cfg is False: - return False - if type(cfg) is Configuration: - config = cfg - else: - config = self.driver.get_configuration(cfg) - desired = DesiredResult(configuration=config, - requestor=self.name, - generation=self.driver.generation, - request_date=datetime.now(), - tuning_run=self.driver.tuning_run) - if hasattr(self, 'limit'): - desired.limit = self.limit - self.driver.register_result_callback(desired, self.handle_requested_result) - self.request_count += 1 - return desired - - @abc.abstractmethod - def desired_configuration(self): - """ - return a cfg that we should test - given a ConfigurationManipulator and SearchDriver - return None if there are no configurations to test - return False if waiting for results - """ - return dict() - - def handle_requested_result(self, result): - """called for each new Result(), regardless of who requested it""" - pass - - def default_generated_name(self): - """ The default generated name for this technique """ - return self.base_name() - - def use_default_generated_name(self): - """ set the name of this technique to the default generated name """ - self.name = self.default_generated_name() - - def base_name(self): - """ - Return the base name of this technique with form - classname;hyperparam1,v1;hyperparam2,v2 ... - where hyperparams are taken in order from get_hyper_parameters() - - Should only be called after this technique has finished initializing. - """ - out = [self.__class__.__name__] - for hyper_parameter in self.get_hyper_parameters(): - # get hyperparam,v as a string and append - try: - out.append(hyper_parameter + ',' + str(getattr(self, hyper_parameter))) - except AttributeError: - log.error("Uninitialized hyper-parameter %s for technique %s.", - hyper_parameter, self.__class__.__name__) - - return ';'.join(out) - - @classmethod - def get_hyper_parameters(cls): - """ - return a list of hyper-parameters names for this technique - - Name strings must match the corresponding attribute with the hyper-parameter - value on technique instances. Names should also match the key word argument - used when initializing an instance. Hyperparameters should only take literal - values. - - For example, given hyper parameter "mutation_rate", then the __init__ method - should have 'mutation_rate' as a key word argument and later have the line - self.mutation_rate = mutation_rate - """ - return [] - - @classmethod - def generate_technique(cls, manipulator=None, *args, **kwargs): - """ return a new technique based off this instance """ - t = cls(*args, **kwargs) - t.use_default_generated_name() - return t - -class PureRandom(SearchTechnique): - """ - request configurations completely randomly - """ - def desired_configuration(self): - return self.manipulator.random() - -class AsyncProceduralSearchTechnique(SearchTechnique): - def __init__(self, *pargs, **kwargs): - super(AsyncProceduralSearchTechnique, self).__init__(*pargs, **kwargs) - self.gen = None - self.done = False - self.latest_results = [] - - def call_main_generator(self): - """passthrough (used in subclasses)""" - return self.main_generator() - - def desired_configuration(self): - if self.gen is None: - log.debug("%s: creating generator", self.name) - self.gen = self.call_main_generator() - if not self.done: - try: - return self.gen.next() - except StopIteration: - log.debug("%s: generator finished", self.name) - self.done = True - return None - - @abc.abstractmethod - def main_generator(self): - """ - custom generator to conduct this search, should: - yield config - to request tests and call driver.get_results() to read the results - - in AsyncProceduralSearchTechnique results are ready at an undefined - time (`yield False` to stall and wait for them) - - in SequentialSearchTechnique results are ready after the yield - """ - pass - - def is_ready(self): - return not self.done - -class SequentialSearchTechnique(AsyncProceduralSearchTechnique): - def __init__(self, novelty_threshold=50, reset_threshold=500, *pargs, **kwargs): - super(SequentialSearchTechnique, self).__init__(*pargs, **kwargs) - self.pending_tests = [] - self.novelty_threshold = novelty_threshold - self.rounds_since_novel_request = 0 - self.reset_threshold = reset_threshold - - def yield_nonblocking(self, cfg): - """ - within self.main_generator() act like `yield cfg`, but don't wait for the - results until the following yield (spawn/sync style) - """ - if cfg: - self.pending_tests.append(cfg) - - def call_main_generator(self): - """insert waits for results after every yielded item""" - subgen = self.main_generator() - self.rounds_since_novel_request = 0 - while True: - self.rounds_since_novel_request += 1 - if (self.rounds_since_novel_request % self.novelty_threshold) == 0: - log.warning("%s has not requested a new result for %d rounds", - self.name, self.rounds_since_novel_request) - if (self.rounds_since_novel_request > self.reset_threshold): - log.warning("%s is being reset", self.name) - subgen = self.main_generator() - self.rounds_since_novel_request = 0 - yield None # give other techniques a shot - try: - p = subgen.next() - if p: - self.pending_tests.append(p) - except StopIteration: - return - finally: - for p in self.pending_tests: - if not self.driver.has_results(p): - self.rounds_since_novel_request = 0 - yield p - - # wait for all pending_tests to have results - c = 0 - while self.pending_tests: - log.debug("%s: waiting for %d pending tests", - self.name, len(self.pending_tests)) - c += 1 - if (c % 100) == 0: - log.error("%s: still waiting for %d pending tests (c=%d)", - self.name, len(self.pending_tests), c) - - self.pending_tests = filter(lambda x: not self.driver.has_results(x), - self.pending_tests) - if self.pending_tests: - self.rounds_since_novel_request = 0 - yield False # wait - -#list of all techniques -the_registry = list() - -#list of technique generators -the_generator_registry = list() - -def register(t): - the_registry.append(t) - -def register_generator(cls, generator_weight=1.0, *args, **kwargs): - """ - register a technique generator - a tuple of (technique class, args, kwargs) - where args and kwargs will be passed into the generate_technique classmethod - - with specified probability weight when randomly choosing a generator - - :param cls: a technique class to use as a generator - :param generator_weight: probability weighting when randomly choosing a generator - :param args: arguments to pass into generate_technique class method - :param kwargs: arguments to pass into generate_technique class method - """ - the_generator_registry.append(((cls, args, kwargs), generator_weight)) - -register(PureRandom()) - -def get_random_generator_technique(generators=None, manipulator=None): - """ - Takes in a sequence of ((generator, args, kwargs), weight) tuples. - Returns a random generated technique info tuple - - :param generators: optional argument to avoid repeated getting of generators - :param manipulator: manipulator to pass to generate_technique class method. - """ - if generators is None: - techniques, generators = all_techniques() - g, args, kwargs = weighted_choice(generators) - return g.generate_technique(manipulator, *args, **kwargs) - - -def weighted_choice(choices): - """ takes in a sequence of (choice, weight) tuples and randomly returns one """ - total = sum(w for c, w in choices) - r = random.uniform(0, total) - upto = 0 - for c, w in choices: - upto += w - if upto > r: - return c - return random.choice([c for c, w in choices]) - - -def all_techniques(): - #import all modules in search to ensure techniques are Registered - for f in sorted(os.listdir(os.path.dirname(__file__))): - m = re.match(r'^(.*)[.]py$', f) - if m: - import_module('opentuner.search.'+m.group(1)) - - return the_registry, the_generator_registry - -def get_enabled(args): - techniques, generators = all_techniques() - if args.list_techniques: - for t in techniques: - print t.name - sys.exit(0) - - if not args.technique: - # no techniques specified, default technique - args.technique = ['AUCBanditMetaTechniqueA'] - - for unknown in set(args.technique) - set(map(_.name, techniques)): - log.error('unknown technique %s', unknown) - raise Exception('Unknown technique: --technique={}'.format(unknown)) - - return [t for t in techniques if t.name in args.technique] - -def get_root(args): - from metatechniques import RoundRobinMetaSearchTechnique - enabled = get_enabled(args) - if len(enabled) == 1: - return enabled[0] - return RoundRobinMetaSearchTechnique(get_enabled(args)) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/tuningrunmain.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/tuningrunmain.py deleted file mode 100644 index 9bcf1b5270286ee405373822d3919ae8854a24c3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/tuningrunmain.py +++ /dev/null @@ -1,224 +0,0 @@ -# vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab autoindent smarttab -import argparse -import copy -import inspect -import logging -import math -import os -import socket -import sys -import time -import uuid -from datetime import datetime - -from opentuner import resultsdb -from opentuner.search.driver import SearchDriver -from opentuner.measurement.driver import MeasurementDriver - -log = logging.getLogger(__name__) - -argparser = argparse.ArgumentParser(add_help=False) -argparser.add_argument('--label', - help="name for the TuningRun") -argparser.add_argument('--print-search-space-size', action='store_true', - help="Print out the estimated size of the search space and exit") -argparser.add_argument('--database', - help=("database to store tuning results in, see: " - "http://docs.sqlalchemy.org/en/rel_0_8/core/engines.html#database-urls")) -argparser.add_argument('--print-params','-pp',action='store_true', - help='show parameters of the configuration being tuned') - - -class CleanStop(Exception): - pass - - -class LogFormatter(logging.Formatter): - def format(self, record): - record.relativeCreated /= 1000.0 - try: - # python 2.7 - return super(LogFormatter, self).format(record) - except: - # python 2.6 - return _OldFormatter.format(self, record) - - -_OldFormatter = logging.Formatter -logging.Formatter = LogFormatter - -try: - # python 2.7 - from logging.config import dictConfig -except: - # python 2.6 - from .utils.dictconfig import dictConfig - -the_logging_config = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': {'console': {'format': '[%(relativeCreated)6.0fs] ' - '%(levelname)7s %(name)s: ' - '%(message)s'}, - 'file': {'format': '[%(asctime)-15s] ' - '%(levelname)7s %(name)s: ' - '%(message)s ' - '@%(filename)s:%(lineno)d'}}, - 'handlers': {'console': {'class': 'logging.StreamHandler', - 'formatter': 'console', - 'level': 'INFO'}, - 'file': {'class': 'logging.FileHandler', - 'filename': 'opentuner.log', - 'formatter': 'file', - 'level': 'WARNING'}}, - 'loggers': {'': {'handlers': ['console', 'file'], - 'level': 'INFO', - 'propagate': True}}} - - -def init_logging(): - dictConfig(the_logging_config) - global init_logging - init_logging = lambda: None - - -class TuningRunMain(object): - def __init__(self, - measurement_interface, - args, - search_driver=SearchDriver, - measurement_driver=MeasurementDriver): - init_logging() - - manipulator = measurement_interface.manipulator() - if args.print_search_space_size: - print "10^{%.2f}" % math.log(manipulator.search_space_size(), 10) - sys.exit(0) - # show internal parameter representation - if args.print_params: - cfg = manipulator.seed_config() - d = manipulator.parameters_dict(cfg) - params_dict ={} - for k in d: - cls = d[k].__class__.__name__ - p = (k, d[k].search_space_size()) - if cls in params_dict: - params_dict[cls].append(p) - else: - params_dict[cls] = [p] - for k in params_dict: - print k, params_dict[k] - print - sys.exit(0) - - input_manager = measurement_interface.input_manager() - objective = measurement_interface.objective() - - if not args.database: - #args.database = 'sqlite://' #in memory - if not os.path.isdir('opentuner.db'): - os.mkdir('opentuner.db') - args.database = 'sqlite:///' + os.path.join('opentuner.db', - socket.gethostname() + '.db') - - if '://' not in args.database: - args.database = 'sqlite:///' + args.database - - if not args.label: - args.label = 'unnamed' - - #self.fake_commit = ('sqlite' in args.database) - self.fake_commit = True - - self.args = args - - self.engine, self.Session = resultsdb.connect(args.database) - self.session = self.Session() - self.tuning_run = None - self.search_driver_cls = search_driver - self.measurement_driver_cls = measurement_driver - self.measurement_interface = measurement_interface - self.input_manager = input_manager - self.manipulator = manipulator - self.objective = objective - self.objective_copy = copy.copy(objective) - self.last_commit_time = time.time() - - def init(self): - if self.tuning_run is None: - program_version = (self.measurement_interface - .db_program_version(self.session)) - self.session.flush() - self.measurement_interface.prefix_hook(self.session) - self.tuning_run = ( - resultsdb.models.TuningRun( - uuid=uuid.uuid4().hex, - name=self.args.label, - args=self.args, - start_date=datetime.now(), - program_version=program_version, - objective=self.objective_copy, - )) - self.session.add(self.tuning_run) - - driver_kwargs = { - 'args': self.args, - 'input_manager': self.input_manager, - 'manipulator': self.manipulator, - 'measurement_interface': self.measurement_interface, - 'objective': self.objective, - 'session': self.session, - 'tuning_run_main': self, - 'tuning_run': self.tuning_run, - 'extra_seeds': self.measurement_interface.seed_configurations(), - 'extra_criteria': self.measurement_interface.extra_convergence_criteria - } - - self.search_driver = self.search_driver_cls(**driver_kwargs) - - self.measurement_driver = self.measurement_driver_cls(**driver_kwargs) - self.measurement_interface.set_driver(self.measurement_driver) - self.input_manager.set_driver(self.measurement_driver) - - self.tuning_run.machine_class = self.measurement_driver.get_machine_class() - self.tuning_run.input_class = self.input_manager.get_input_class() - - def commit(self, force=False): - if (force or not self.fake_commit or - time.time() - self.last_commit_time > 30): - self.session.commit() - self.last_commit_time = time.time() - else: - self.session.flush() - - def main(self): - self.init() - try: - self.tuning_run.state = 'RUNNING' - self.commit(force=True) - self.search_driver.main() - if self.search_driver.best_result: - self.measurement_interface.save_final_config( - self.search_driver.best_result.configuration) - self.tuning_run.final_config = self.search_driver.best_result.configuration - self.tuning_run.state = 'COMPLETE' - except: - self.tuning_run.state = 'ABORTED' - raise - finally: - self.tuning_run.end_date = datetime.now() - self.commit(force=True) - self.session.close() - - def results_wait(self, generation): - """called by search_driver to wait for results""" - #single process version: - self.measurement_interface.pre_process() - self.measurement_driver.process_all() - self.measurement_interface.post_process() - -def main(interface, args, *pargs, **kwargs): - if inspect.isclass(interface): - interface = interface(args=args, *pargs, **kwargs) - return TuningRunMain(interface, args).main() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/adddeps.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/adddeps.py deleted file mode 100644 index e2fc74064b605e92367907a7641442df0cf97cd9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/adddeps.py +++ /dev/null @@ -1,13 +0,0 @@ - -import sys -from os.path import normpath, realpath, dirname, join, isfile - -project_root = normpath(join(dirname(realpath(__file__)), '../..')) - -if 'venv' not in ','.join(sys.path): - venv_activate = join(project_root, 'venv/bin/activate_this.py') - if isfile(venv_activate): - execfile(venv_activate, dict(__file__=venv_activate)) - -sys.path.insert(0, project_root) - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/compactdb.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/compactdb.py deleted file mode 100755 index 25a70d2d3b2658e877aa51a1462d5a9366635057..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/compactdb.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python - -if __name__ == '__main__': - import adddeps - -import argparse -import logging -import sys - -import opentuner -from opentuner.resultsdb.models import * - -log = logging.getLogger('opentuner.utils.compactdb') - -argparser = argparse.ArgumentParser() -argparser.add_argument('database') -argparser.add_argument('--level', type=int, default=2) - - -def main(args): - if '://' not in args.database: - args.database = "sqlite:///" + args.database - engine, Session = opentuner.resultsdb.connect(args.database) - session = Session() - - config_count = session.query(Configuration).count() - # result_count = session.query(Result).count() - # desired_result_count = session.query(DesiredResult).count() - - if args.level >= 1: - q = (session.query(Configuration) - .filter(~Configuration.id.in_(session.query(Result.configuration_id) - .filter_by(was_new_best=True) - .subquery())) - .filter(Configuration.data != None)) - - log.info("%s: compacted %d of %d Configurations", - args.database, - q.update({'data': None}, False), - config_count) - session.commit() - - if args.level >= 2: - session.execute('VACUUM;') - session.commit() - - log.info('done') - - -if __name__ == '__main__': - opentuner.tuningrunmain.init_logging() - sys.exit(main(argparser.parse_args())) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/dictconfig.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/dictconfig.py deleted file mode 100644 index 7b835a41084d1c24f40002e93940c574b60bb696..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/dictconfig.py +++ /dev/null @@ -1,544 +0,0 @@ -# This is a copy of the Python logging.config.dictconfig module, -# reproduced with permission. It is provided here for backwards -# compatibility for Python versions prior to 2.7. -# -# Copyright 2009-2010 by Vinay Sajip. All Rights Reserved. -# -# Permission to use, copy, modify, and distribute this software and its -# documentation for any purpose and without fee is hereby granted, -# provided that the above copyright notice appear in all copies and that -# both that copyright notice and this permission notice appear in -# supporting documentation, and that the name of Vinay Sajip -# not be used in advertising or publicity pertaining to distribution -# of the software without specific, written prior permission. -# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL -# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR -# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -import logging.handlers -import re -import sys -import types - - -IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I) - -def valid_ident(s): - m = IDENTIFIER.match(s) - if not m: - raise ValueError('Not a valid Python identifier: %r' % s) - return True - -# -# This function is defined in logging only in recent versions of Python -# -try: - from logging import _checkLevel -except ImportError: - def _checkLevel(level): - if isinstance(level, int): - rv = level - elif str(level) == level: - if level not in logging._levelNames: - raise ValueError('Unknown level: %r' % level) - rv = logging._levelNames[level] - else: - raise TypeError('Level not an integer or a ' - 'valid string: %r' % level) - return rv - -# The ConvertingXXX classes are wrappers around standard Python containers, -# and they serve to convert any suitable values in the container. The -# conversion converts base dicts, lists and tuples to their wrapped -# equivalents, whereas strings which match a conversion format are converted -# appropriately. -# -# Each wrapper should have a configurator attribute holding the actual -# configurator to use for conversion. - -class ConvertingDict(dict): - """A converting dictionary wrapper.""" - - def __getitem__(self, key): - value = dict.__getitem__(self, key) - result = self.configurator.convert(value) - #If the converted value is different, save for next time - if value is not result: - self[key] = result - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - result.key = key - return result - - def get(self, key, default=None): - value = dict.get(self, key, default) - result = self.configurator.convert(value) - #If the converted value is different, save for next time - if value is not result: - self[key] = result - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - result.key = key - return result - - def pop(self, key, default=None): - value = dict.pop(self, key, default) - result = self.configurator.convert(value) - if value is not result: - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - result.key = key - return result - -class ConvertingList(list): - """A converting list wrapper.""" - def __getitem__(self, key): - value = list.__getitem__(self, key) - result = self.configurator.convert(value) - #If the converted value is different, save for next time - if value is not result: - self[key] = result - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - result.key = key - return result - - def pop(self, idx=-1): - value = list.pop(self, idx) - result = self.configurator.convert(value) - if value is not result: - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - return result - -class ConvertingTuple(tuple): - """A converting tuple wrapper.""" - def __getitem__(self, key): - value = tuple.__getitem__(self, key) - result = self.configurator.convert(value) - if value is not result: - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - result.key = key - return result - -class BaseConfigurator(object): - """ - The configurator base class which defines some useful defaults. - """ - - CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$') - - WORD_PATTERN = re.compile(r'^\s*(\w+)\s*') - DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*') - INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*') - DIGIT_PATTERN = re.compile(r'^\d+$') - - value_converters = { - 'ext' : 'ext_convert', - 'cfg' : 'cfg_convert', - } - - # We might want to use a different one, e.g. importlib - importer = __import__ - - def __init__(self, config): - self.config = ConvertingDict(config) - self.config.configurator = self - - def resolve(self, s): - """ - Resolve strings to objects using standard import and attribute - syntax. - """ - name = s.split('.') - used = name.pop(0) - try: - found = self.importer(used) - for frag in name: - used += '.' + frag - try: - found = getattr(found, frag) - except AttributeError: - self.importer(used) - found = getattr(found, frag) - return found - except ImportError: - e, tb = sys.exc_info()[1:] - v = ValueError('Cannot resolve %r: %s' % (s, e)) - v.__cause__, v.__traceback__ = e, tb - raise v - - def ext_convert(self, value): - """Default converter for the ext:// protocol.""" - return self.resolve(value) - - def cfg_convert(self, value): - """Default converter for the cfg:// protocol.""" - rest = value - m = self.WORD_PATTERN.match(rest) - if m is None: - raise ValueError("Unable to convert %r" % value) - else: - rest = rest[m.end():] - d = self.config[m.groups()[0]] - #print d, rest - while rest: - m = self.DOT_PATTERN.match(rest) - if m: - d = d[m.groups()[0]] - else: - m = self.INDEX_PATTERN.match(rest) - if m: - idx = m.groups()[0] - if not self.DIGIT_PATTERN.match(idx): - d = d[idx] - else: - try: - n = int(idx) # try as number first (most likely) - d = d[n] - except TypeError: - d = d[idx] - if m: - rest = rest[m.end():] - else: - raise ValueError('Unable to convert ' - '%r at %r' % (value, rest)) - #rest should be empty - return d - - def convert(self, value): - """ - Convert values to an appropriate type. dicts, lists and tuples are - replaced by their converting alternatives. Strings are checked to - see if they have a conversion format and are converted if they do. - """ - if not isinstance(value, ConvertingDict) and isinstance(value, dict): - value = ConvertingDict(value) - value.configurator = self - elif not isinstance(value, ConvertingList) and isinstance(value, list): - value = ConvertingList(value) - value.configurator = self - elif not isinstance(value, ConvertingTuple) and\ - isinstance(value, tuple): - value = ConvertingTuple(value) - value.configurator = self - return value - - def configure_custom(self, config): - """Configure an object with a user-supplied factory.""" - c = config.pop('()') - if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType: - c = self.resolve(c) - props = config.pop('.', None) - # Check for valid identifiers - kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) - result = c(**kwargs) - if props: - for name, value in props.items(): - setattr(result, name, value) - return result - - def as_tuple(self, value): - """Utility function which converts lists to tuples.""" - if isinstance(value, list): - value = tuple(value) - return value - -class DictConfigurator(BaseConfigurator): - """ - Configure logging using a dictionary-like object to describe the - configuration. - """ - - def configure(self): - """Do the configuration.""" - - config = self.config - if 'version' not in config: - raise ValueError("dictionary doesn't specify a version") - if config['version'] != 1: - raise ValueError("Unsupported version: %s" % config['version']) - incremental = config.pop('incremental', False) - EMPTY_DICT = {} - logging._acquireLock() - try: - if incremental: - handlers = config.get('handlers', EMPTY_DICT) - # incremental handler config only if handler name - # ties in to logging._handlers (Python 2.7) - if sys.version_info[:2] == (2, 7): - for name in handlers: - if name not in logging._handlers: - raise ValueError('No handler found with ' - 'name %r' % name) - else: - try: - handler = logging._handlers[name] - handler_config = handlers[name] - level = handler_config.get('level', None) - if level: - handler.setLevel(_checkLevel(level)) - except StandardError as e: - raise ValueError('Unable to configure handler ' - '%r: %s' % (name, e)) - loggers = config.get('loggers', EMPTY_DICT) - for name in loggers: - try: - self.configure_logger(name, loggers[name], True) - except StandardError as e: - raise ValueError('Unable to configure logger ' - '%r: %s' % (name, e)) - root = config.get('root', None) - if root: - try: - self.configure_root(root, True) - except StandardError as e: - raise ValueError('Unable to configure root ' - 'logger: %s' % e) - else: - disable_existing = config.pop('disable_existing_loggers', True) - - logging._handlers.clear() - del logging._handlerList[:] - - # Do formatters first - they don't refer to anything else - formatters = config.get('formatters', EMPTY_DICT) - for name in formatters: - try: - formatters[name] = self.configure_formatter( - formatters[name]) - except StandardError as e: - raise ValueError('Unable to configure ' - 'formatter %r: %s' % (name, e)) - # Next, do filters - they don't refer to anything else, either - filters = config.get('filters', EMPTY_DICT) - for name in filters: - try: - filters[name] = self.configure_filter(filters[name]) - except StandardError as e: - raise ValueError('Unable to configure ' - 'filter %r: %s' % (name, e)) - - # Next, do handlers - they refer to formatters and filters - # As handlers can refer to other handlers, sort the keys - # to allow a deterministic order of configuration - handlers = config.get('handlers', EMPTY_DICT) - for name in sorted(handlers): - try: - handler = self.configure_handler(handlers[name]) - handler.name = name - handlers[name] = handler - except StandardError as e: - raise ValueError('Unable to configure handler ' - '%r: %s' % (name, e)) - # Next, do loggers - they refer to handlers and filters - - #we don't want to lose the existing loggers, - #since other threads may have pointers to them. - #existing is set to contain all existing loggers, - #and as we go through the new configuration we - #remove any which are configured. At the end, - #what's left in existing is the set of loggers - #which were in the previous configuration but - #which are not in the new configuration. - root = logging.root - existing = list(root.manager.loggerDict) - #The list needs to be sorted so that we can - #avoid disabling child loggers of explicitly - #named loggers. With a sorted list it is easier - #to find the child loggers. - existing.sort() - #We'll keep the list of existing loggers - #which are children of named loggers here... - child_loggers = [] - #now set up the new ones... - loggers = config.get('loggers', EMPTY_DICT) - for name in loggers: - if name in existing: - i = existing.index(name) - prefixed = name + "." - pflen = len(prefixed) - num_existing = len(existing) - i = i + 1 # look at the entry after name - while (i < num_existing) and\ - (existing[i][:pflen] == prefixed): - child_loggers.append(existing[i]) - i = i + 1 - existing.remove(name) - try: - self.configure_logger(name, loggers[name]) - except StandardError as e: - raise ValueError('Unable to configure logger ' - '%r: %s' % (name, e)) - - #Disable any old loggers. There's no point deleting - #them as other threads may continue to hold references - #and by disabling them, you stop them doing any logging. - #However, don't disable children of named loggers, as that's - #probably not what was intended by the user. - for log in existing: - logger = root.manager.loggerDict[log] - if log in child_loggers: - logger.level = logging.NOTSET - logger.handlers = [] - logger.propagate = True - elif disable_existing: - logger.disabled = True - - # And finally, do the root logger - root = config.get('root', None) - if root: - try: - self.configure_root(root) - except StandardError as e: - raise ValueError('Unable to configure root ' - 'logger: %s' % e) - finally: - logging._releaseLock() - - def configure_formatter(self, config): - """Configure a formatter from a dictionary.""" - if '()' in config: - factory = config['()'] # for use in exception handler - try: - result = self.configure_custom(config) - except TypeError as te: - if "'format'" not in str(te): - raise - #Name of parameter changed from fmt to format. - #Retry with old name. - #This is so that code can be used with older Python versions - #(e.g. by Django) - config['fmt'] = config.pop('format') - config['()'] = factory - result = self.configure_custom(config) - else: - fmt = config.get('format', None) - dfmt = config.get('datefmt', None) - result = logging.Formatter(fmt, dfmt) - return result - - def configure_filter(self, config): - """Configure a filter from a dictionary.""" - if '()' in config: - result = self.configure_custom(config) - else: - name = config.get('name', '') - result = logging.Filter(name) - return result - - def add_filters(self, filterer, filters): - """Add filters to a filterer from a list of names.""" - for f in filters: - try: - filterer.addFilter(self.config['filters'][f]) - except StandardError as e: - raise ValueError('Unable to add filter %r: %s' % (f, e)) - - def configure_handler(self, config): - """Configure a handler from a dictionary.""" - formatter = config.pop('formatter', None) - if formatter: - try: - formatter = self.config['formatters'][formatter] - except StandardError as e: - raise ValueError('Unable to set formatter ' - '%r: %s' % (formatter, e)) - level = config.pop('level', None) - filters = config.pop('filters', None) - if '()' in config: - c = config.pop('()') - if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType: - c = self.resolve(c) - factory = c - else: - klass = self.resolve(config.pop('class')) - #Special case for handler which refers to another handler - if issubclass(klass, logging.handlers.MemoryHandler) and\ - 'target' in config: - try: - config['target'] = self.config['handlers'][config['target']] - except StandardError as e: - raise ValueError('Unable to set target handler ' - '%r: %s' % (config['target'], e)) - elif issubclass(klass, logging.handlers.SMTPHandler) and\ - 'mailhost' in config: - config['mailhost'] = self.as_tuple(config['mailhost']) - elif issubclass(klass, logging.handlers.SysLogHandler) and\ - 'address' in config: - config['address'] = self.as_tuple(config['address']) - factory = klass - kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) - try: - result = factory(**kwargs) - except TypeError as te: - if "'stream'" not in str(te): - raise - #The argument name changed from strm to stream - #Retry with old name. - #This is so that code can be used with older Python versions - #(e.g. by Django) - kwargs['strm'] = kwargs.pop('stream') - result = factory(**kwargs) - if formatter: - result.setFormatter(formatter) - if level is not None: - result.setLevel(_checkLevel(level)) - if filters: - self.add_filters(result, filters) - return result - - def add_handlers(self, logger, handlers): - """Add handlers to a logger from a list of names.""" - for h in handlers: - try: - logger.addHandler(self.config['handlers'][h]) - except StandardError as e: - raise ValueError('Unable to add handler %r: %s' % (h, e)) - - def common_logger_config(self, logger, config, incremental=False): - """ - Perform configuration which is common to root and non-root loggers. - """ - level = config.get('level', None) - if level is not None: - logger.setLevel(_checkLevel(level)) - if not incremental: - #Remove any existing handlers - for h in logger.handlers[:]: - logger.removeHandler(h) - handlers = config.get('handlers', None) - if handlers: - self.add_handlers(logger, handlers) - filters = config.get('filters', None) - if filters: - self.add_filters(logger, filters) - - def configure_logger(self, name, config, incremental=False): - """Configure a non-root logger from a dictionary.""" - logger = logging.getLogger(name) - self.common_logger_config(logger, config, incremental) - propagate = config.get('propagate', None) - if propagate is not None: - logger.propagate = propagate - - def configure_root(self, config, incremental=False): - """Configure a root logger from a dictionary.""" - root = logging.getLogger() - self.common_logger_config(root, config, incremental) - -dictConfigClass = DictConfigurator - -def dictConfig(config): - """Configure logging using a dictionary.""" - dictConfigClass(config).configure() diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/stats.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/stats.py deleted file mode 100755 index 99449c8a900a3f8ad53c6c12fbbc4d2197b1cb45..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/stats.py +++ /dev/null @@ -1,468 +0,0 @@ -#!/usr/bin/env python - -if __name__ == '__main__': - import adddeps - -import argparse -import csv -import hashlib -import itertools -import logging -import math -import os -import sqlalchemy.orm.exc -import subprocess -import sys - -from collections import defaultdict -from fn import _ -from fn import Stream -from fn.iters import repeat -from pprint import pprint - -import opentuner -from opentuner import resultsdb -from opentuner.resultsdb.models import * - -log = logging.getLogger('opentuner.utils.stats') - -argparser = argparse.ArgumentParser() -argparser.add_argument('--label') -argparser.add_argument('--stats', action='store_true', - help="run in stats mode") -argparser.add_argument('--by-request-count', action='store_true', - help='report stats by request count') -argparser.add_argument('--stats-quanta', type=float, default=10, - help="step size in seconds for binning with --stats") -argparser.add_argument('--stats-dir', default='stats', - help="directory to output --stats to") -argparser.add_argument('--stats-input', default="opentuner.db") -argparser.add_argument('--min-runs', type=int, default=1, - help="ignore series with less then N runs") - -PCTSTEPS = map(_/20.0, xrange(21)) - -def mean(vals): - n = 0.0 - d = 0.0 - for v in vals: - if v is not None: - n += v - d += 1.0 - if d == 0.0: - return None - return n/d - -def median(vals): - vals = sorted(vals) - a = (len(vals)-1)/2 - b = (len(vals))/2 - return (vals[a]+vals[b])/2.0 - -def percentile(vals, pct): - vals = sorted(vals) - pos = (len(vals)-1) * pct - a = int(math.floor(pos)) - b = min(len(vals) - 1, a + 1) - return (1.0-(pos-a))*vals[a] + (pos-a)*vals[b] - -def variance(vals): - vals = filter(lambda x: x is not None, vals) - avg = mean(vals) - if avg is None: - return None - if avg in (float('inf'), float('-inf')): - return avg - return mean(map((_ - avg) ** 2, vals)) - -def stddev(vals): - var = variance(vals) - if var is None: - return None - return math.sqrt(var) - -def hash_args(x): - d = dict(vars(x)) - for k in ('database', 'results_log', 'results_log_details'): - d[k] = None - return hashlib.sha256(str(sorted(d.items()))).hexdigest()[:20] - -def run_label(tr, short = False): - techniques = ','.join(tr.args.technique) - if not tr.name or tr.name=='unnamed': - if short: - return techniques - else: - return "%s_%s" % (techniques, hash_args(tr.args)[:6]) - else: - return tr.name - -def run_dir(base, tr): - return os.path.join(base, - tr.program.project, - tr.program.name.split('/')[-1], - tr.program_version.version[:16]) - -class StatsMain(object): - def __init__(self, args): - self.args = args - path = args.stats_input - self.dbs = list() - for f in os.listdir(path): - if 'journal' in f: - continue - try: - e, sm = resultsdb.connect('sqlite:///'+os.path.join(path, f)) - self.dbs.append(sm()) - except: - log.error('failed to load database: %s', - os.path.join(path, f), - exc_info=True) - - def main(self): - dir_label_runs = defaultdict(lambda: defaultdict(list)) - for session in self.dbs: - q = (session.query(resultsdb.models.TuningRun) - .filter_by(state='COMPLETE') - .order_by('name')) - - if self.args.label: - q = q.filter(TuningRun.name.in_( - map(str.strip,self.args.label.split(',')))) - - for tr in q: - d = run_dir(self.args.stats_dir, tr) - d = os.path.normpath(d) - dir_label_runs[d][run_label(tr)].append((tr, session)) - - summary_report = defaultdict(lambda: defaultdict(list)) - for d, label_runs in dir_label_runs.iteritems(): - if not os.path.isdir(d): - os.makedirs(d) - session = label_runs.values()[0][0][1] - objective = label_runs.values()[0][0][0].objective - all_run_ids = map(_[0].id, itertools.chain(*label_runs.values())) - q = (session.query(Result) - .filter(Result.tuning_run_id.in_(all_run_ids)) - .filter(Result.time < float('inf')) - .filter_by(was_new_best=True, state='OK')) - total = q.count() - if total == 0: - continue - q = objective.filter_acceptable(q) - acceptable = q.count() - q = q.order_by(*objective.result_order_by_terms()) - best = q.limit(1).one() - worst = q.offset(acceptable-1).limit(1).one() - - map(len, label_runs.values()) - - log.info("%s -- best %.4f / worst %.f4 " - "-- %d of %d acceptable -- %d techniques with %d to %d runs", - d, - best.time, - worst.time, - acceptable, - total, - len(label_runs.values()), - min(map(len, label_runs.values())), - max(map(len, label_runs.values()))) - - for label, runs in sorted(label_runs.items()): - if len(runs) < self.args.min_runs: - print len(runs) ,self.args.min_runs - continue - log.debug('%s/%s has %d runs %s',d, label, len(runs), runs[0][0].args.technique) - self.combined_stats_over_time(d, label, runs, objective, worst, best) - - final_scores = list() - for run, session in runs: - try: - final = (session.query(Result) - .filter_by(tuning_run=run, - configuration=run.final_config) - .limit(1) - .one()) - except sqlalchemy.orm.exc.NoResultFound: - continue - final_scores.append(objective.stats_quality_score(final, worst, best)) - final_scores.sort() - if final_scores: - norm = objective.stats_quality_score(best, worst, best) - if norm > 0.00001: - summary_report[d][run_label(run, short=True)] = ( - percentile(final_scores, 0.5) / norm, - percentile(final_scores, 0.1) / norm, - percentile(final_scores, 0.9) / norm, - ) - else: - summary_report[d][run_label(run, short=True)] = ( - percentile(final_scores, 0.5) + norm + 1.0, - percentile(final_scores, 0.1) + norm + 1.0, - percentile(final_scores, 0.9) + norm + 1.0, - ) - - - with open(self.args.stats_dir+ "/summary.dat", 'w') as o: - # make summary report - keys = sorted(reduce(set.union, - [set(x.keys()) for x in summary_report.values()], - set())) - print >>o, '#####', - for k in keys: - print >>o, k, - print >>o - for d, label_vals in sorted(summary_report.items()): - print >>o, d.split('/')[-2], - for k in keys: - if k in label_vals: - print >>o, '-', label_vals[k][0], label_vals[k][1], label_vals[k][2], - else: - print >>o, '-', '-', '-', '-', - print >>o - - if keys: - plotcmd = ["""1 w lines lt 1 lc rgb "black" notitle""", - """'summary.dat' using 3:4:5:xtic(1) ti "%s" """ % keys[0]] - for n, k in enumerate(keys[1:]): - plotcmd.append("""'' using %d:%d:%d ti "%s" """ % ( - 4*n + 7, - 4*n + 8, - 4*n + 9, - k)) - self.gnuplot_summary_file(self.args.stats_dir, 'summary', plotcmd) - - - - for d, label_runs in dir_label_runs.iteritems(): - labels = [k for k,v in label_runs.iteritems() - if len(v)>=self.args.min_runs] - self.gnuplot_file(d, - "medianperfe", - ['"%s_percentiles.dat" using 1:12:4:18 with errorbars title "%s"' % (l,l) for l in labels]) - self.gnuplot_file(d, - "meanperfe", - ['"%s_percentiles.dat" using 1:21:4:18 with errorbars title "%s"' % (l,l) for l in labels]) - self.gnuplot_file(d, - "medianperfl", - ['"%s_percentiles.dat" using 1:12 with lines title "%s"' % (l,l) for l in labels]) - self.gnuplot_file(d, - "meanperfl", - ['"%s_percentiles.dat" using 1:21 with lines title "%s"' % (l,l) for l in labels]) - - # print - # print "10% Scores", d - # pprint(self.technique_scores(d, labels, '0.1')) - # print - # print "90% Scores", d - # pprint(self.technique_scores(d, labels, '0.9')) - # print - # print "Mean Scores", d - # pprint(self.technique_scores(d, labels, 'mean')) - print - print "Median Scores", d - pprint(self.technique_scores(d, labels, '0.5')) - - - def technique_scores(self, directory, labels, ykey, xkey='#sec', factor=10.0): - max_duration = None - min_value = float('inf') - for label in labels: - try: - dr = csv.DictReader(open(os.path.join(directory,label+"_percentiles.dat")), delimiter=' ', lineterminator='\n') - lastrow = list(dr)[-1] - max_duration = max(max_duration, float(lastrow[xkey])) - min_value = min(min_value, float(lastrow[ykey])) - except: - log.exception("failed computing score") - - scores = list() - - for label in labels: - try: - dr = csv.DictReader(open(os.path.join(directory,label+"_percentiles.dat")), delimiter=' ', lineterminator='\n') - score = 0.0 - lastsec = 0.0 - value = float('inf') - for row in dr: - duration = float(row[xkey]) - lastsec - lastsec = float(row[xkey]) - value = float(row[ykey]) - score += duration * (value - min_value) - score += (factor*max_duration - lastsec) * (value - min_value) - scores.append((score, label)) - except: - log.exception("failed computing score") - - return sorted(scores) - - - def combined_stats_over_time(self, - output_dir, - label, - runs, - objective, - worst, - best, - ): - """ - combine stats_over_time() vectors for multiple runs - """ - - #extract_fn = lambda dr: objective.stats_quality_score(dr.result, worst, best) - extract_fn = _.result.time - combine_fn = min - no_data = 999 - - log.debug("writing stats for %s to %s", label, output_dir) - by_run = [self.stats_over_time(session, run, extract_fn, combine_fn, no_data) - for run, session in runs] - max_len = max(map(len, by_run)) - - by_run_streams = [Stream() << x << repeat(x[-1], max_len-len(x)) - for x in by_run] - by_quanta = zip(*by_run_streams[:]) - - def data_file(suffix, headers, value_function): - with open(os.path.join(output_dir, label+suffix), 'w') as fd: - out = csv.writer(fd, delimiter=' ', lineterminator='\n') - out.writerow(['#sec'] + headers) - for quanta, values in enumerate(by_quanta): - sec = quanta*self.args.stats_quanta - out.writerow([sec] + value_function(values)) - - #data_file('_details.dat', - # map(lambda x: 'run%d'%x, xrange(max_len)), - # list) - #self.gnuplot_file(output_dir, - # label+'_details', - # [('"'+label+'_details.dat"' - # ' using 1:%d'%i + - # ' with lines' - # ' title "Run %d"'%i) - # for i in xrange(max_len)]) - - data_file('_mean.dat', - ['#sec', 'mean', 'stddev'], - lambda values: [mean(values), stddev(values)]) - self.gnuplot_file(output_dir, - label+'_mean', - ['"'+label+'_mean.dat" using 1:2 with lines title "Mean"']) - - def extract_percentiles(values): - values = sorted(values) - return ([values[int(round(p*(len(values)-1)))] for p in PCTSTEPS] - + [mean(values)]) - data_file("_percentiles.dat", PCTSTEPS + ['mean'], extract_percentiles) - self.gnuplot_file(output_dir, - label+'_percentiles', - reversed([ - '"'+label+'_percentiles.dat" using 1:2 with lines title "0%"', - # '"" using 1:3 with lines title "5%"', - '"" using 1:4 with lines title "10%"', - # '"" using 1:5 with lines title "25%"', - '"" using 1:6 with lines title "20%"', - # '"" using 1:7 with lines title "35%"', - '"" using 1:8 with lines title "30%"', - # '"" using 1:9 with lines title "45%"', - '"" using 1:10 with lines title "40%"', - # '"" using 1:11 with lines title "55%"', - '"" using 1:12 with lines title "50%"', - # '"" using 1:13 with lines title "65%"', - '"" using 1:14 with lines title "70%"', - # '"" using 1:15 with lines title "75%"', - '"" using 1:16 with lines title "80%"', - # '"" using 1:17 with lines title "85%"', - '"" using 1:18 with lines title "90%"', - # '"" using 1:19 with lines title "95%"', - '"'+label+'_percentiles.dat" using 1:20 with lines title "100%"', - ])) - - def gnuplot_file(self, output_dir, prefix, plotcmd): - with open(os.path.join(output_dir, prefix+'.gnuplot'), 'w') as fd: - print >>fd, 'set terminal postscript eps enhanced color' - print >>fd, 'set output "%s"' % (prefix+'.eps') - print >>fd, 'set ylabel "Execution Time (seconds)"' - print >>fd, 'set xlabel "Autotuning Time (seconds)"' - print >>fd, 'plot', ',\\\n'.join(plotcmd) - - try: - subprocess.call(['gnuplot', prefix+'.gnuplot'], cwd=output_dir, stdin=None) - except OSError: - log.error("command gnuplot not found") - - def gnuplot_summary_file(self, output_dir, prefix, plotcmd): - with open(os.path.join(output_dir, prefix+'.gnuplot'), 'w') as fd: - print >>fd, 'set terminal postscript eps enhanced color' - print >>fd, 'set output "%s"' % (prefix+'.eps') - print >>fd, ''' -set boxwidth 0.9 -set style fill solid 1.00 border 0 -set style histogram errorbars gap 2 lw 1 -set style data histograms -set xtics rotate by -45 -set bars 0.5 -set yrange [0:20] - -set yrange [0:10] -set key out vert top left -set size 1.5,1 -set ytics 1 - -''' - print >>fd, 'plot', ',\\\n'.join(plotcmd) - subprocess.call(['gnuplot', prefix+'.gnuplot'], cwd=output_dir, stdin=None) - - - def stats_over_time(self, - session, - run, - extract_fn, - combine_fn, - no_data = None): - """ - return reduce(combine_fn, map(extract_fn, data)) for each quanta of the - tuning run - """ - value_by_quanta = [ no_data ] - start_date = run.start_date - - subq = (session.query(Result.id) - .filter_by(tuning_run = run, was_new_best = True, state='OK')) - - q = (session.query(DesiredResult) - .join(Result) - .filter(DesiredResult.state=='COMPLETE', - DesiredResult.tuning_run == run, - DesiredResult.result_id.in_(subq.subquery())) - .order_by(DesiredResult.request_date)) - - first_id = None - for dr in q: - if first_id is None: - first_id = dr.id - td = (dr.request_date - start_date) - duration = td.seconds + (td.days * 24 * 3600.0) - if self.args.by_request_count: - quanta = dr.id - first_id - else: - quanta = int(duration / self.args.stats_quanta) - while len(value_by_quanta) <= quanta: - value_by_quanta.append(value_by_quanta[-1]) - - if value_by_quanta[-1] is no_data: - value_by_quanta[-1] = extract_fn(dr) - else: - value_by_quanta[-1] = combine_fn(value_by_quanta[-1], extract_fn(dr)) - - return value_by_quanta - - - - - -if __name__ == '__main__': - opentuner.tuningrunmain.init_logging() - sys.exit(StatsMain(argparser.parse_args()).main()) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/stats_matplotlib.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/stats_matplotlib.py deleted file mode 100644 index 54e9132a662fa68089ce3d0ba00cb6502bd2c712..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/opentuner/utils/stats_matplotlib.py +++ /dev/null @@ -1,290 +0,0 @@ -#!usr/bin/python - -if __name__ == '__main__': - import adddeps - -import itertools -import math -import matplotlib.pyplot as plt -import numpy -import os -import sqlalchemy -import sqlalchemy.orm.exc - -from collections import defaultdict -from fn import _ -from fn import Stream -from fn.iters import repeat -from opentuner import resultsdb - -PCTSTEPS = map(_/20.0, xrange(21)) - - -def mean(vals): - """ - Arguments, - vals: List of floating point numbers - Returns, - The mean of the numbers in the input list - None if all values in the list are None - """ - filtered_values = [float(x) for x in vals if x is not None] - if len(filtered_values) == 0: - return None - return numpy.mean(numpy.array(filtered_values)) - - -def stddev(vals): - """ - Arguments, - vals: List of floating point numbers - Returns, - The standard deviation of numbers in the input list - None if all values in the list are None - """ - filtered_values = [float(x) for x in vals if x is not None] - if len(filtered_values) == 0: - return None - return math.sqrt(numpy.var(numpy.array(filtered_values))) - - -def get_dbs(path, db_type='sqlite:///'): - """ - Arguments, - path: Path of directory containing .db files - Returns, - A list of (engine, session) pairs to the dbs pointed to by - the db files - """ - dbs = list() - for f in os.listdir(path): - if 'journal' in f: - continue - try: - db_path = os.path.join(path, f) - e, sm = resultsdb.connect(db_type + db_path) - dbs.append(sm()) - except Exception as e: - print e - print "Error encountered while connecting to db" - return dbs - - -def matplotlibplot_file(labels, xlim = None, ylim = None, disp_types=['median']): - """ - Arguments, - labels: List of labels that need to be included in the plot - xlim: Integer denoting the maximum X-coordinate in the plot - ylim: Integer denoting the maximum Y-coordinate in the plot - disp_types: List of measures that are to be displayed in the plot - Returns, - A figure object representing the required plot - """ - - figure = plt.figure() - values = get_values(labels) - for label in values: - (mean_values, percentile_values) = values[label] - for disp_type in disp_types: - cols = None - data = percentile_values - - if disp_type == 'median': - cols = [11] - elif disp_type == 'mean': - cols = [1] - data = mean_values - elif disp_type == 'all_percentiles': - cols = range(1,22) - - plotted_data = [[] for x in xrange(len(cols))] - - x_indices = [] - for data_point in data[1:]: - x_indices.append(int(data_point[0])) - for i in range(0, len(cols)): - plotted_data[i].append(float(data_point[cols[i]])) - args = [] - for to_plot in plotted_data: - args.append(x_indices) - args.append(to_plot) - - plt.plot(*args, label='%s(%s)' % (label, disp_type)) - - if xlim is not None: - plt.xlim(xlim) - if ylim is not None: - plt.ylim(ylim) - - plt.xlabel('Autotuning Time (seconds)') - plt.ylabel('Execution Time (seconds)') - plt.legend(loc='upper right') - return figure - - -def run_label(tr): - techniques = ','.join(tr.args.technique) - if not tr.name or tr.name == 'unnamed': - return techniques - return tr.name - - -def combined_stats_over_time(label, - runs, - objective, - worst, - best, - ): - """ - combine stats_over_time() vectors for multiple runs - """ - - extract_fn = _.result.time - combine_fn = min - no_data = 999 - - by_run = [stats_over_time(session, run, extract_fn, combine_fn, no_data) - for run, session in runs] - max_len = max(map(len, by_run)) - - by_run_streams = [Stream() << x << repeat(x[-1], max_len-len(x)) - for x in by_run] - by_quanta = zip(*by_run_streams[:]) - - # TODO: Fix this, this variable should be configurable - stats_quanta = 10 - def get_data(value_function): - final_values = [] - for quanta, values in enumerate(by_quanta): - sec = quanta*stats_quanta - final_values.append([sec] + value_function(values)) - return final_values - - mean_values = get_data(lambda values: [mean(values), stddev(values)]) - - def extract_percentiles(values): - values = sorted(values) - return ([values[int(round(p*(len(values)-1)))] for p in PCTSTEPS] - + [mean(values)]) - percentile_values = get_data(extract_percentiles) - return mean_values, percentile_values - - -def stats_over_time(session, - run, - extract_fn, - combine_fn, - no_data = None): - """ - return reduce(combine_fn, map(extract_fn, data)) for each quanta of the - tuning run - """ - value_by_quanta = [ no_data ] - start_date = run.start_date - - subq = (session.query(resultsdb.models.Result.id) - .filter_by(tuning_run = run, was_new_best = True, state='OK')) - - q = (session.query(resultsdb.models.DesiredResult) - .join(resultsdb.models.Result) - .filter(resultsdb.models.DesiredResult.state=='COMPLETE', - resultsdb.models.DesiredResult.tuning_run == run, - resultsdb.models.DesiredResult.result_id.in_(subq.subquery())) - .order_by(resultsdb.models.DesiredResult.request_date)) - - first_id = None - for dr in q: - if first_id is None: - first_id = dr.id - td = (dr.request_date - start_date) - duration = td.seconds + (td.days * 24 * 3600.0) - # TODO: Make this variable configurable - by_request_count = True - stats_quanta = 10 - if by_request_count: - quanta = dr.id - first_id - else: - quanta = int(duration / stats_quanta) - while len(value_by_quanta) <= quanta: - value_by_quanta.append(value_by_quanta[-1]) - - if value_by_quanta[-1] is no_data: - value_by_quanta[-1] = extract_fn(dr) - else: - value_by_quanta[-1] = combine_fn(value_by_quanta[-1], extract_fn(dr)) - - return value_by_quanta - - -def get_all_labels(): - """ - Returns, - List of labels that are in the complete state - """ - dbs = get_dbs(os.getcwd()) - all_labels = list() - for db in dbs: - all_labels.extend(db.query(resultsdb.models.TuningRun.name) - .filter_by(state='COMPLETE') - .distinct() - .all()) - all_labels = [str(element[0]) for element in all_labels] - return all_labels - - -def get_values(labels): - """ - Arguments, - labels: List of labels whose values are of interest - Returns, - A list of (mean, percentile) tuples, corresponding to the - provided list of labels - """ - dbs = get_dbs(os.getcwd()) - dir_label_runs = defaultdict(lambda: defaultdict(list)) - for db in dbs: - q = (db.query(resultsdb.models.TuningRun) - .filter_by(state='COMPLETE') - .order_by('name')) - if labels: - q = q.filter(resultsdb.models.TuningRun.name.in_(labels)) - for tr in q: - dir_label_runs[run_label(tr)][run_label(tr)].append((tr, db)) - all_run_ids = list() - returned_values = {} - for d, label_runs in dir_label_runs.iteritems(): - all_run_ids = map(_[0].id, itertools.chain(*label_runs.values())) - session = label_runs.values()[0][0][1] - objective = label_runs.values()[0][0][0].objective - - q = (session.query(resultsdb.models.Result) - .filter(resultsdb.models.Result.tuning_run_id.in_(all_run_ids)) - .filter(resultsdb.models.Result.time < float('inf')) - .filter_by(was_new_best=True, state='OK')) - total = q.count() - q = objective.filter_acceptable(q) - acceptable = q.count() - q = q.order_by(*objective.result_order_by_terms()) - best = q.limit(1).one() - worst = q.offset(acceptable - 1).limit(1).one() - - for label, runs in sorted(label_runs.items()): - (mean_values, percentile_values) = combined_stats_over_time(label, runs, objective, worst, best) - returned_values[label] = (mean_values, percentile_values) - final_scores = list() - for run, session in runs: - try: - final = (session.query(resultsdb.models.Result) - .filter_by(tuning_run = run, - configuration = run.final_config) - .limit(1).one()) - except sqlalchemy.orm.exc.NoResultFound: - continue - final_scores.append(objective.stats_quality_score(final, worst, best)) - final_scores.sort() - return returned_values - -if __name__ == '__main__': - labels = [u'timeouts', u'always_reorder', u'add_store_at', u'all_options'] - get_values(labels) - print get_all_labels() diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/optional-requirements.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/optional-requirements.txt deleted file mode 100644 index 9848f674cb6e5ca1faba757abd98eb5066e4688d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/optional-requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -django==1.6.1 -matplotlib==1.1.1 -virtualenv==1.9.1 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/requirements.txt b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/requirements.txt deleted file mode 100644 index fa9cfeca2ede04002798fea0db669de3c87879d4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -argparse>=1.2.1 -fn>=0.2.12 -numpy>=1.8.0 -pysqlite>=2.6.3 -SQLAlchemy>=0.8.2 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/setup.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/setup.py deleted file mode 100755 index 633d4359d9e9655b5241521208fecc37bc4ab65f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/setup.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/python -try: - from setuptools import setup -except ImportError: - try: - from setuptools.core import setup - except ImportError: - from distutils.core import setup - -try: - from pypandoc import convert - read_md = lambda f: convert(f, 'rest') -except ImportError: - print("warning: pypandoc module not found, could not convert Markdown to RST") - read_md = lambda f: open(f, 'r').read() - -required = open('requirements.txt').read().splitlines() -required = [l.strip() for l in required - if l.strip() and not l.strip().startswith('#')] - -setup( - name='opentuner', - version='0.8.0', - url='http://opentuner.org/', - license='MIT', - author='Jason Ansel', - author_email='jansel@jansel.net', - description='An extensible framework for program autotuning', - long_description=read_md('README.md'), - packages=['opentuner', 'opentuner.resultsdb', 'opentuner.utils', - 'opentuner.measurement', 'opentuner.search'], - install_requires=required, -) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/manage.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/manage.py deleted file mode 100644 index f27b5b8db13b490f7599856364f59c6fedcbfe6e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/manage.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "stats_app.settings") - - from django.core.management import execute_from_command_line - - execute_from_command_line(sys.argv) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/settings.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/settings.py deleted file mode 100644 index 09505be03e5621e4df952e878b52973da9588ffc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/settings.py +++ /dev/null @@ -1,162 +0,0 @@ -# Django settings for stats_app project. -import os - -DEBUG = True -TEMPLATE_DEBUG = DEBUG - -ADMINS = ( - # ('Your Name', 'your_email@example.com'), -) - -MANAGERS = ADMINS -DIRECTORY_NAME = os.path.dirname(os.path.realpath(__file__)) - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'. - 'NAME': DIRECTORY_NAME + '/db', # Or path to database file if using sqlite3. - # The following settings are not used with sqlite3: - 'USER': '', - 'PASSWORD': '', - 'HOST': '', # Empty for localhost through domain sockets or '127.0.0.1' for localhost through TCP. - 'PORT': '', # Set to empty string for default. - } -} - -# Hosts/domain names that are valid for this site; required if DEBUG is False -# See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts -ALLOWED_HOSTS = [] - -# Local time zone for this installation. Choices can be found here: -# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name -# although not all choices may be available on all operating systems. -# In a Windows environment this must be set to your system time zone. -TIME_ZONE = 'America/Chicago' - -# Language code for this installation. All choices can be found here: -# http://www.i18nguy.com/unicode/language-identifiers.html -LANGUAGE_CODE = 'en-us' - -SITE_ID = 1 - -# If you set this to False, Django will make some optimizations so as not -# to load the internationalization machinery. -USE_I18N = True - -# If you set this to False, Django will not format dates, numbers and -# calendars according to the current locale. -USE_L10N = True - -# If you set this to False, Django will not use timezone-aware datetimes. -USE_TZ = True - -# Absolute filesystem path to the directory that will hold user-uploaded files. -# Example: "/var/www/example.com/media/" -MEDIA_ROOT = '' - -# URL that handles the media served from MEDIA_ROOT. Make sure to use a -# trailing slash. -# Examples: "http://example.com/media/", "http://media.example.com/" -MEDIA_URL = '' - -# Absolute path to the directory static files should be collected to. -# Don't put anything in this directory yourself; store your static files -# in apps' "static/" subdirectories and in STATICFILES_DIRS. -# Example: "/var/www/example.com/static/" -STATIC_ROOT = '' - -# URL prefix for static files. -# Example: "http://example.com/static/", "http://static.example.com/" -STATIC_URL = '/static/' - -# Additional locations of static files -STATICFILES_DIRS = ( - # Put strings here, like "/home/html/static" or "C:/www/django/static". - # Always use forward slashes, even on Windows. - # Don't forget to use absolute paths, not relative paths. - DIRECTORY_NAME + '/static', -) - -# List of finder classes that know how to find static files in -# various locations. -STATICFILES_FINDERS = ( - 'django.contrib.staticfiles.finders.FileSystemFinder', - 'django.contrib.staticfiles.finders.AppDirectoriesFinder', -# 'django.contrib.staticfiles.finders.DefaultStorageFinder', -) - -# Make this unique, and don't share it with anybody. -SECRET_KEY = 't!!j*1gt0(5n%6nj-lirzja-9uj6s86s#0@kp2@8v&x#+c2+c-' - -# List of callables that know how to import templates from various sources. -TEMPLATE_LOADERS = ( - 'django.template.loaders.filesystem.Loader', - 'django.template.loaders.app_directories.Loader', -# 'django.template.loaders.eggs.Loader', -) - -MIDDLEWARE_CLASSES = ( - 'django.middleware.common.CommonMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - # Uncomment the next line for simple clickjacking protection: - # 'django.middleware.clickjacking.XFrameOptionsMiddleware', -) - -ROOT_URLCONF = 'stats_app.urls' - -# Python dotted path to the WSGI application used by Django's runserver. -WSGI_APPLICATION = 'stats_app.wsgi.application' - -TEMPLATE_DIRS = ( - # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". - # Always use forward slashes, even on Windows. - # Don't forget to use absolute paths, not relative paths. - DIRECTORY_NAME + '/templates', -) - -INSTALLED_APPS = ( - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.sites', - 'django.contrib.messages', - 'django.contrib.staticfiles', - # Uncomment the next line to enable the admin: - 'django.contrib.admin', - # Uncomment the next line to enable admin documentation: - 'django.contrib.admindocs', -) - -SESSION_SERIALIZER = 'django.contrib.sessions.serializers.JSONSerializer' - -# A sample logging configuration. The only tangible logging -# performed by this configuration is to send an email to -# the site admins on every HTTP 500 error when DEBUG=False. -# See http://docs.djangoproject.com/en/dev/topics/logging for -# more details on how to customize your logging configuration. -LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'filters': { - 'require_debug_false': { - '()': 'django.utils.log.RequireDebugFalse' - } - }, - 'handlers': { - 'mail_admins': { - 'level': 'ERROR', - 'filters': ['require_debug_false'], - 'class': 'django.utils.log.AdminEmailHandler' - } - }, - 'loggers': { - 'django.request': { - 'handlers': ['mail_admins'], - 'level': 'ERROR', - 'propagate': True, - }, - } -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/static/charts.css b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/static/charts.css deleted file mode 100644 index e32e2832aeac39540f2fb2c39e3817b6ab85cf3b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/static/charts.css +++ /dev/null @@ -1,11 +0,0 @@ -img.center { -display: block; -margin-left: auto; -margin-right: auto; - -padding: 8px; -border: solid; -border-color: #dddddd #aaaaaa #aaaaaa #dddddd; -border-width: 1px 2px 2px 1px; -background-color: white; -} diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/templates/charts.html b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/templates/charts.html deleted file mode 100644 index d38bb4c0c6c7c31d9cfce0bbc74d57f7601b3c83..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/templates/charts.html +++ /dev/null @@ -1,41 +0,0 @@ -<!DOCTYPE html> -<head> -<link rel="stylesheet" type="text/css" href="{{ STATIC_URL }}charts.css" media="screen" /> -<html lang="en"> - <title>{% block title %}Graph{% endblock %}</title> -</head> - -<body> - <div id="graphForm"> - <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"> </script> - <script> - function callback() {{ - var values = $('#graphForm form').serialize(); - $('#graphForm img').attr("src", "graph.png?" + values); - }} - </script> - <p style="text-align:center"> - <img src="graph.png" id="graph" /> - </p> - <form method = "GET" action="" style="text-align:center"> - <h3>X Limits:</h3> - <input type="range" name="xlim" min="0" max="10000"> - <br><h3>Y Limits:</h3> - <input type="range" name="ylim" min="0" max="20"> - <br> - <h3>Labels:</h3> - {0} - <br> - <h3>Measure:</h3> - <b>Mean:</b> - <input type="checkbox" name="disp_type" value="mean"> - <b>Median:</b> - <input type="checkbox" name="disp_type" value="median"> - <b>All percentiles:</b> - <input type="checkbox" name="disp_type" value="all_percentiles"> - <br> - <input type="button" value="Graph!" onclick="callback()"> - </form> - </div> -</body> -</html> diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/urls.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/urls.py deleted file mode 100644 index 15743290ec27ae4f2a4e633e80483d972190c870..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/urls.py +++ /dev/null @@ -1,20 +0,0 @@ -from django.conf.urls import patterns, include, url - -# Uncomment the next two lines to enable the admin: -from django.contrib import admin -import views.charts -admin.autodiscover() - -urlpatterns = patterns('', - # Examples: - # url(r'^$', 'stats_app.views.home', name='home'), - # url(r'^stats_app/', include('stats_app.foo.urls')), - - # Uncomment the admin/doc line below to enable admin documentation: - # url(r'^admin/doc/', include('django.contrib.admindocs.urls')), - - # Uncomment the next line to enable the admin: - url(r'^admin/', include(admin.site.urls)), - url(r'^graph.png$', views.charts.display_graph, name='graph'), - url(r'^$', views.charts.display_full_page, name='graph_page'), -) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views.py deleted file mode 100644 index 7cb32b3655aa032c745e97a088ad80365dd9c551..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views.py +++ /dev/null @@ -1,5 +0,0 @@ -from django.http import HttpResponse - - -def index(request): - return HttpResponse("Hello, world. You're at the stats application index.") diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views/charts.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views/charts.py deleted file mode 100644 index c3a2ebff32281967f11640ef16a39353ca501d1a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/views/charts.py +++ /dev/null @@ -1,67 +0,0 @@ -import datetime -import django -from django.shortcuts import render -from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas -from matplotlib.dates import DateFormatter -from matplotlib.figure import Figure -import random - -from opentuner.utils import stats_matplotlib as stats - - -def display_graph(request): - """ - Handles request to display graph with provided parameters - """ - request_dict = dict(request.GET.iterlists()) - - xlim = request_dict.get('xlim', None) - if xlim: - xlim = int(xlim[0]) - else: - xlim = 5000 - xlim = [0, xlim] - - ylim = request_dict.get('ylim', None) - if ylim: - ylim = int(ylim[0]) - else: - ylim = 10 - ylim = [0, ylim] - - labels = request_dict.get('labels', None) - - disp_types = request_dict.get('disp_type', None) - if not disp_types: - disp_types = ['median'] - - fig = stats.matplotlibplot_file(labels, xlim=xlim, ylim=ylim, disp_types=disp_types) - canvas = FigureCanvas(fig) - response = django.http.HttpResponse(content_type='image/png') - canvas.print_png(response) - return response - - -def display_full_page(request): - """ - Handles request to display the full page - """ - all_labels = stats.get_all_labels() - label_list = get_label_list(all_labels) - html = render(request, 'charts.html') - content = html.content - content = content.format(label_list) - html.content = content - return html - - -def get_label_list(all_labels): - """ - Returns list of html form inputs corresponding to the different - labels in the provided db file - """ - label_list = '' - for label in all_labels: - label_list += '<b>%s</b>:<input type="checkbox" name="labels" value="%s">' % (label, label) - return label_list - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/wsgi.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/wsgi.py deleted file mode 100644 index 90f54d8e3dd53cadeeb3eafa33e1abd734485cd0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/stats_app/stats_app/wsgi.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -WSGI config for stats_app project. - -This module contains the WSGI application used by Django's development server -and any production WSGI deployments. It should expose a module-level variable -named ``application``. Django's ``runserver`` and ``runfcgi`` commands discover -this application via the ``WSGI_APPLICATION`` setting. - -Usually you will have the standard Django WSGI application here, but it also -might make sense to replace the whole Django WSGI application with a custom one -that later delegates to the Django one. For example, you could introduce WSGI -middleware here, or combine a Django application with an application of another -framework. - -""" -import os - -# We defer to a DJANGO_SETTINGS_MODULE already in the environment. This breaks -# if running multiple sites in the same mod_wsgi process. To fix this, use -# mod_wsgi daemon mode with each site in its own daemon process, or use -# os.environ["DJANGO_SETTINGS_MODULE"] = "stats_app.settings" -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "stats_app.settings") - -# This application object is used by any WSGI server configured to use this -# file. This includes Django's development server, if the WSGI_APPLICATION -# setting points here. -from django.core.wsgi import get_wsgi_application -application = get_wsgi_application() - -# Apply WSGI middleware here. -# from helloworld.wsgi import HelloWorldApplication -# application = HelloWorldApplication(application) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/tests/test_manipulator.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/tests/test_manipulator.py deleted file mode 100644 index c6548510fdc40036978728a1c2ad06ff05a6b9e9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/tests/test_manipulator.py +++ /dev/null @@ -1,270 +0,0 @@ -import unittest -import opentuner -import mock -import random -import numpy -from opentuner.search import manipulator - -def faked_random(nums): - f = fake_random(nums) - def inner(*args, **kwargs): - return f.next() - return inner - -def fake_random(nums): - i = 0 - while True: - yield nums[i] - i = (i+1) % len(nums) - - -class PermutationOperatorTests(unittest.TestCase): - - def setUp(self): - """ - Set up a few configurations. The values of the PermutationParameter are: - config1 - 0 1 2 3 4 5 6 7 8 9 - config2 - 4 3 2 1 0 9 8 7 6 5 - config3 - 1 0 4 2 7 9 5 3 6 8 - - """ - self.manipulator = manipulator.ConfigurationManipulator() - self.param1 = manipulator.PermutationParameter("param1", [0,1,2,3,4,5,6,7,8,9]) - self.manipulator.add_parameter(self.param1) - - self.cfg = self.manipulator.seed_config() - self.config1 = self.manipulator.seed_config() - self.config2 = self.manipulator.seed_config() - self.config3 = self.manipulator.seed_config() - - # repeating values - self.config4 = self.manipulator.seed_config() - self.config5 = self.manipulator.seed_config() - - - self.param1.set_value(self.config1, [0,1,2,3,4,5,6,7,8,9]) - self.param1.set_value(self.config2, [4,3,2,1,0,9,8,7,6,5]) - self.param1.set_value(self.config3, [1,0,4,2,7,9,5,3,6,8]) - - # repeating values - self.param1.set_value(self.config4, [1,2,3,4,2,3,4,3,4,4]) - self.param1.set_value(self.config5, [4,2,4,3,3,1,3,4,2,4]) - - @mock.patch('random.randint', side_effect=faked_random([1,6])) - def test_op2_random_swap_1_6(self, randint_func): - # operator shouuld swap the indices at 1 and 6 - self.param1.op2_random_swap(self.cfg, self.config1) - - self.assertEqual(self.param1.get_value(self.cfg),[0,6,2,3,4,5,1,7,8,9]) - self.assertEqual(self.param1.get_value(self.config1),[0,1,2,3,4,5,6,7,8,9]) - - - @mock.patch('random.randint', side_effect=faked_random([7,2])) - def test_op2_random_invert(self, randint_func): - #should reverse a section of length 3 starting at index given by randint - self.param1.op2_random_invert(self.cfg, self.config1) - self.assertEqual(self.param1.get_value(self.cfg),[0,1,2,3,4,5,6,9,8,7]) - - self.param1.op2_random_invert(self.cfg, self.config1) - self.assertEqual(self.param1.get_value(self.cfg),[0,1,4,3,2,5,6,7,8,9]) - - - @mock.patch('random.randint', side_effect=faked_random([0])) - def test_op3_cross_PMX_str5(self, randint_func): - # should perform PMX with a cut at 0 and crossover size 5 - self.param1.op3_cross(self.cfg, self.config1, self.config3, - xchoice='op3_cross_PMX', strength=0.5) - self.assertEqual(self.param1.get_value(self.cfg),[1,0,4,2,7,5,6,3,8,9]) - - @mock.patch('random.randint', side_effect=faked_random([5])) - @mock.patch('random.uniform', side_effect=faked_random([0.4])) - def test_op3_swarm_CX_no_cross(self, uniform_func, randint_func): - # should perform no cross - self.param1.op3_swarm(self.config1, self.config2, self.config3, - xchoice='op3_cross_CX', c=0.8) - self.assertEqual(self.param1.get_value(self.config1),[0,1,2,3,4,5,6,7,8,9]) - - - @mock.patch('random.randint', side_effect=faked_random([5])) - @mock.patch('random.uniform', side_effect=faked_random([0.4])) - def test_op3_swarm_CX_cross_p1(self, uniform_func, randint_func): - # should cross the first parent - self.param1.op3_swarm(self.config1, self.config2, self.config3, - xchoice='op3_cross_CX', c=0.3, c1=0.5, c2="unused") - self.assertEqual(self.param1.get_value(self.config1),[0,1,2,3,4,9,6,7,8,5]) - - @mock.patch('random.randint', side_effect=faked_random([5])) - @mock.patch('random.uniform', side_effect=faked_random([0.4])) - def test_op3_swarm_CX_cross_p2(self, uniform_func, randint_func): - # should cross the second parent - self.param1.op3_swarm(self.config1, self.config2, self.config3, - xchoice='op3_cross_CX', c=0.3, c1=0.3, c2="unused") - self.assertEqual(self.param1.get_value(self.config1),[0,1,2,3,4,9,5,7,6,8]) - - - @mock.patch('random.randint', side_effect=faked_random([5])) - def test_op3_cross_PX_5(self, randint_func): - # Random cut point = 5 (index = 4) - self.param1.op3_cross_PX(self.cfg, self.config1, self.config3, 2) - self.assertEqual(self.param1.get_value(self.cfg),[1,0,4,2,3,5,6,7,8,9]) - - @mock.patch('random.randint', side_effect=faked_random([2])) - def test_op3_cross_PMX_0_d4(self, randint_func): - # cut = 2, d = 4 - self.param1.op3_cross_PMX(self.cfg, self.config2, self.config3, 4) - self.assertEqual(self.param1.get_value(self.cfg),[1,3,4,2,7,9,8,0,6,5]) - - - @mock.patch('random.randint', side_effect=faked_random([0])) - def test_op3_cross_PMX_0_d5(self, randint_func): - # cut = 0, d = 5 - self.param1.op3_cross_PMX(self.cfg, self.config1, self.config3, 5) - self.assertEqual(self.param1.get_value(self.cfg),[1,0,4,2,7,5,6,3,8,9]) - - @mock.patch('random.randint', side_effect=faked_random([4])) - def test_op3_cross_PMX_dups(self, randint_func): - # cut = 4, d = 5 - self.param1.op3_cross_PMX(self.cfg, self.config5, self.config4, 5) - - # [4,2,4,3,3,1,3,4,2,4] - # [1,2,3,4,2,3,4,3,4,4] - # expected: - # [1,2,4,3,2,3,4,3,4,4] - - self.assertEqual(self.param1.get_value(self.cfg), [1,2,4,3,2,3,4,3,4,4]) - - - @mock.patch('random.randint', side_effect=faked_random([5])) - def test_op3_cross_CX_5(self, randint_func): - # initial replacement at index 5 - self.param1.op3_cross_CX(self.cfg, self.config1, self.config2, "unused") - self.assertEqual(self.param1.get_value(self.cfg),[0,1,2,3,4,9,6,7,8,5]) - self.param1.op3_cross_CX(self.cfg, self.config1, self.config3, "unused") - self.assertEqual(self.param1.get_value(self.cfg),[0,1,2,3,4,9,5,7,6,8]) - - @mock.patch('random.randint', side_effect=faked_random([0])) - def test_op3_cross_CX_dups(self, randint_func): - # initial replacement at index 4 - self.param1.op3_cross_CX(self.cfg, self.config5, self.config4, "unused") - - # [4,2,4,3,3,1,3,4,2,4] - # [1,2,3,4,2,3,4,3,4,4] - # expected: - # [1,2,3,4,3,3,4,4,2,4] - - self.assertEqual(self.param1.get_value(self.cfg), [1,2,3,4,3,3,4,4,2,4]) - - - @mock.patch('random.randint', side_effect=faked_random([3])) - def test_op3_cross_OX1_3_d4(self, randint_func): - # cut at 3 - # d = 4 - self.param1.op3_cross_OX1(self.cfg, self.config1, self.config2, 4) - self.assertEqual(self.param1.get_value(self.cfg),[2,3,4,1,0,9,8,5,6,7]) - self.param1.op3_cross_OX1(self.cfg, self.config1, self.config3, 4) - self.assertEqual(self.param1.get_value(self.cfg),[0,1,3,2,7,9,5,4,6,8]) - - @mock.patch('random.randint', side_effect=faked_random([4,2])) - def test_op3_cross_OX3_2_5_d4(self, randint_func): - # cuts at 4,2 - # d = 4 - self.param1.op3_cross_OX3(self.cfg, self.config1, self.config2, 4) - self.assertEqual(self.param1.get_value(self.cfg),[3,4,5,6,2,1,0,9,7,8]) - self.param1.op3_cross_OX3(self.cfg, self.config1, self.config3, 4) - self.assertEqual(self.param1.get_value(self.cfg),[0,1,3,5,4,2,7,9,6,8]) - - -class FloatArrayOperatorTests(unittest.TestCase): - """ - also tests the operators for Array (since Array is abstract) - """ - - def setUp(self): - """ - Set up a few configurations. The values of the FloatArray are: - config1 - 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 - config2 - 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 - config3 - 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 - """ - self.manipulator = manipulator.ConfigurationManipulator() - self.param1 = manipulator.FloatArray("param1", 10, 4, 0) - self.manipulator.add_parameter(self.param1) - - self.cfg = self.manipulator.seed_config() - self.config1 = self.manipulator.seed_config() - self.config2 = self.manipulator.seed_config() - self.config3 = self.manipulator.seed_config() - - self.param1.set_value(self.config1, numpy.array([1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9])) - self.param1.set_value(self.config2, numpy.array([2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9])) - self.param1.set_value(self.config3, numpy.array([3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.8])) - - - @mock.patch('random.randint', side_effect=faked_random([3])) - def test_op3_cross_3_str4(self, randint_func): - self.param1.op3_cross(self.cfg, self.config1, self.config2, strength=0.4) - - val = self.param1.get_value(self.cfg) - expected = [1.0,1.1,1.2,2.3,2.4,2.5,2.6,1.7,1.8,1.9] - for i in range(len(val)): - self.assertAlmostEqual(val[i], expected[i]) - - @mock.patch('random.randint', side_effect=faked_random([3])) - @mock.patch('random.uniform', side_effect=faked_random([0.4])) - def test_op3_swarm_no_cross(self, uniform_func, randint_func): - #should perform no cross - self.param1.op3_swarm(self.config1, self.config2, self.config3, - xchoice='op3_cross_CX', c=0.8) - val = self.param1.get_value(self.config1) - expected = [1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9] - for i in range(len(val)): - self.assertAlmostEqual(val[i], expected[i]) - - - @mock.patch('random.randint', side_effect=faked_random([3])) - @mock.patch('random.uniform', side_effect=faked_random([0.4])) - def test_op3_swarm_cross_p1(self, uniform_func, randint_func): - #should cross the first parent - self.param1.op3_swarm(self.config1, self.config2, self.config3, - xchoice='op3_cross_CX', c=0.3, c1=0.5, c2="unused") - val = self.param1.get_value(self.config1) - expected = [1.0,1.1,1.2,2.3,2.4,2.5,1.6,1.7,1.8,1.9] - for i in range(len(val)): - self.assertAlmostEqual(val[i], expected[i]) - - - @mock.patch('random.randint', side_effect=faked_random([3])) - @mock.patch('random.uniform', side_effect=faked_random([0.4])) - def test_op3_swarm_cross_p2(self, uniform_func, randint_func): - #should cross the second parent - self.param1.op3_swarm(self.config1, self.config2, self.config3, - xchoice='op3_cross_CX', c=0.3, c1=0.3, c2="unused") - val = self.param1.get_value(self.config1) - expected = [1.0,1.1,1.2,3.3,3.4,3.5,1.6,1.7,1.8,1.9] - self.assertEqual(len(val),len(expected)) - for i in range(len(val)): - self.assertAlmostEqual(val[i], expected[i]) - - @mock.patch('random.random', side_effect=faked_random([0.2, 0.4])) - def test_op3_swarm_parallel(self, random_func): - # r1 = 0.2, r2 = 0.4, velocities = [-2,0,0,0,0,0,1,1.5,2,3] - # max and min are 4, 0 - velocities = numpy.array([-2.0,0.0,0,0,0,0,1.0,1.5,2,3.0]) - - vs = self.param1.op3_swarm_parallel(self.config1, self.config2, self.config3, velocities=velocities) - vs_expected = [-1.5,.5,.5,.5,.5,.5,1.5,2.0,2.5,3.48] - - self.assertEqual(len(vs),len(vs_expected)) - - for i in range(len(vs)): - self.assertAlmostEqual(vs[i], vs_expected[i]) - - - val = self.param1.get_value(self.config1) - expected = [0,1.6,1.7,1.8,1.9,2.0,3.1,3.7,4,4] - self.assertEqual(len(val),len(expected)) - for i in range(len(val)): - self.assertAlmostEqual(val[i], expected[i]) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/tests/test_technique.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/tests/test_technique.py deleted file mode 100644 index c6107bace942a5ac85533878131fb953439ea3f7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/tests/test_technique.py +++ /dev/null @@ -1,77 +0,0 @@ -import unittest -import opentuner -import mock -from opentuner.search.composableevolutionarytechniques import ComposableEvolutionaryTechnique -from opentuner.search import manipulator - -def faked_random(nums): - f = fake_random(nums) - def inner(*args, **kwargs): - return f.next() - return inner - -def fake_random(nums): - i = 0 - while True: - yield nums[i] - i = (i+1) % len(nums) - -class EmptyComposableEvolutionaryTechnique(ComposableEvolutionaryTechnique): - def __init__(self, *pargs, **kwargs): - super(EmptyComposableEvolutionaryTechnique, self).__init__(*pargs, **kwargs) - - def minimum_number_of_parents(self): - return 4 - - def get_parents(self, population): - cfg = self.manipulator.copy(population[0].config) - - return [cfg] - - def update_population(self, config, population): - # replace the oldest configuration if the new one is better. - population[0].config = config - - return population - -class ComposableSearchTechniqueTests(unittest.TestCase): - - def setUp(self): - self.operator_map = {} - ComposableEvolutionaryTechnique.add_to_map(self.operator_map, - manipulator.PermutationParameter, - "op3_cross", xchoice='op3_cross_CX') - ComposableEvolutionaryTechnique.add_to_map(self.operator_map, - "FloatArray", - "op3_cross", strength=0.4) - self.technique = EmptyComposableEvolutionaryTechnique(operator_map = self.operator_map) - - def test_add_to_map(self): - op_map = {} - op_map[manipulator.PermutationParameter] = {'op_name': 'op3_cross', - 'args': (), - 'kwargs': {'xchoice': 'op3_cross_CX'}} - op_map[manipulator.FloatArray] = {'op_name': 'op3_cross', - 'args': (), - 'kwargs': {'strength': 0.4}} - self.assertDictEqual(self.operator_map, op_map) - - def test_get_default_oeprator(self): - default = self.technique.get_default_operator(manipulator.PermutationParameter) - self.assertDictEqual(default, {'op_name': 'op1_nop', 'args': [], 'kwargs': {}}) - - - def test_get_operator(self): - default = self.technique.get_operator(manipulator.IntegerParameter) - self.assertDictEqual(default, {'op_name': 'op1_nop', 'args': [], 'kwargs': {}}) - - default = self.technique.get_operator(manipulator.PermutationParameter) - self.assertDictEqual(default, {'op_name': 'op3_cross','args': (),'kwargs': {'xchoice': 'op3_cross_CX'}}) - - @mock.patch('opentuner.search.manipulator.PermutationParameter.op3_cross') - def test_apply_operator(self, op3_cross_func): - param_instance = manipulator.PermutationParameter('temp', [1,2,3,4,5]) - self.technique.apply_operator(param_instance, ['p1', 'p2', 'p3', 'p4']) - op3_cross_func.assert_called_once_with('p1', 'p2', 'p3', xchoice='op3_cross_CX') - -#TODO tests for RandomThreeParentsComposableTechnique diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/venv-bootstrap.py b/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/venv-bootstrap.py deleted file mode 100755 index 6d6ad0113b72ffe5610b28ac2717442cba6eff8c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/opentuner/venv-bootstrap.py +++ /dev/null @@ -1,2611 +0,0 @@ -#!/usr/bin/env python -## WARNING: This file is generated -#!/usr/bin/env python -"""Create a "virtual" Python installation -""" - -# If you change the version here, change it in setup.py -# and docs/conf.py as well. -__version__ = "1.9.1" # following best practices -virtualenv_version = __version__ # legacy, again - -import base64 -import sys -import os -import codecs -import optparse -import re -import shutil -import logging -import tempfile -import zlib -import errno -import glob -import distutils.sysconfig -from distutils.util import strtobool -import struct -import subprocess - -if sys.version_info < (2, 6): - print('ERROR: %s' % sys.exc_info()[1]) - print('ERROR: this script requires Python 2.6 or greater.') - sys.exit(101) - -try: - set -except NameError: - from sets import Set as set -try: - basestring -except NameError: - basestring = str - -try: - import ConfigParser -except ImportError: - import configparser as ConfigParser - -join = os.path.join -py_version = 'python%s.%s' % (sys.version_info[0], sys.version_info[1]) - -is_jython = sys.platform.startswith('java') -is_pypy = hasattr(sys, 'pypy_version_info') -is_win = (sys.platform == 'win32') -is_cygwin = (sys.platform == 'cygwin') -is_darwin = (sys.platform == 'darwin') -abiflags = getattr(sys, 'abiflags', '') - -user_dir = os.path.expanduser('~') -if is_win: - default_storage_dir = os.path.join(user_dir, 'virtualenv') -else: - default_storage_dir = os.path.join(user_dir, '.virtualenv') -default_config_file = os.path.join(default_storage_dir, 'virtualenv.ini') - -if is_pypy: - expected_exe = 'pypy' -elif is_jython: - expected_exe = 'jython' -else: - expected_exe = 'python' - - -REQUIRED_MODULES = ['os', 'posix', 'posixpath', 'nt', 'ntpath', 'genericpath', - 'fnmatch', 'locale', 'encodings', 'codecs', - 'stat', 'UserDict', 'readline', 'copy_reg', 'types', - 're', 'sre', 'sre_parse', 'sre_constants', 'sre_compile', - 'zlib'] - -REQUIRED_FILES = ['lib-dynload', 'config'] - -majver, minver = sys.version_info[:2] -if majver == 2: - if minver >= 6: - REQUIRED_MODULES.extend(['warnings', 'linecache', '_abcoll', 'abc']) - if minver >= 7: - REQUIRED_MODULES.extend(['_weakrefset']) - if minver <= 3: - REQUIRED_MODULES.extend(['sets', '__future__']) -elif majver == 3: - # Some extra modules are needed for Python 3, but different ones - # for different versions. - REQUIRED_MODULES.extend(['_abcoll', 'warnings', 'linecache', 'abc', 'io', - '_weakrefset', 'copyreg', 'tempfile', 'random', - '__future__', 'collections', 'keyword', 'tarfile', - 'shutil', 'struct', 'copy', 'tokenize', 'token', - 'functools', 'heapq', 'bisect', 'weakref', - 'reprlib']) - if minver >= 2: - REQUIRED_FILES[-1] = 'config-%s' % majver - if minver == 3: - import sysconfig - platdir = sysconfig.get_config_var('PLATDIR') - REQUIRED_FILES.append(platdir) - # The whole list of 3.3 modules is reproduced below - the current - # uncommented ones are required for 3.3 as of now, but more may be - # added as 3.3 development continues. - REQUIRED_MODULES.extend([ - #"aifc", - #"antigravity", - #"argparse", - #"ast", - #"asynchat", - #"asyncore", - "base64", - #"bdb", - #"binhex", - #"bisect", - #"calendar", - #"cgi", - #"cgitb", - #"chunk", - #"cmd", - #"codeop", - #"code", - #"colorsys", - #"_compat_pickle", - #"compileall", - #"concurrent", - #"configparser", - #"contextlib", - #"cProfile", - #"crypt", - #"csv", - #"ctypes", - #"curses", - #"datetime", - #"dbm", - #"decimal", - #"difflib", - #"dis", - #"doctest", - #"dummy_threading", - "_dummy_thread", - #"email", - #"filecmp", - #"fileinput", - #"formatter", - #"fractions", - #"ftplib", - #"functools", - #"getopt", - #"getpass", - #"gettext", - #"glob", - #"gzip", - "hashlib", - #"heapq", - "hmac", - #"html", - #"http", - #"idlelib", - #"imaplib", - #"imghdr", - "imp", - "importlib", - #"inspect", - #"json", - #"lib2to3", - #"logging", - #"macpath", - #"macurl2path", - #"mailbox", - #"mailcap", - #"_markupbase", - #"mimetypes", - #"modulefinder", - #"multiprocessing", - #"netrc", - #"nntplib", - #"nturl2path", - #"numbers", - #"opcode", - #"optparse", - #"os2emxpath", - #"pdb", - #"pickle", - #"pickletools", - #"pipes", - #"pkgutil", - #"platform", - #"plat-linux2", - #"plistlib", - #"poplib", - #"pprint", - #"profile", - #"pstats", - #"pty", - #"pyclbr", - #"py_compile", - #"pydoc_data", - #"pydoc", - #"_pyio", - #"queue", - #"quopri", - #"reprlib", - "rlcompleter", - #"runpy", - #"sched", - #"shelve", - #"shlex", - #"smtpd", - #"smtplib", - #"sndhdr", - #"socket", - #"socketserver", - #"sqlite3", - #"ssl", - #"stringprep", - #"string", - #"_strptime", - #"subprocess", - #"sunau", - #"symbol", - #"symtable", - #"sysconfig", - #"tabnanny", - #"telnetlib", - #"test", - #"textwrap", - #"this", - #"_threading_local", - #"threading", - #"timeit", - #"tkinter", - #"tokenize", - #"token", - #"traceback", - #"trace", - #"tty", - #"turtledemo", - #"turtle", - #"unittest", - #"urllib", - #"uuid", - #"uu", - #"wave", - #"weakref", - #"webbrowser", - #"wsgiref", - #"xdrlib", - #"xml", - #"xmlrpc", - #"zipfile", - ]) - -if is_pypy: - # these are needed to correctly display the exceptions that may happen - # during the bootstrap - REQUIRED_MODULES.extend(['traceback', 'linecache']) - -class Logger(object): - - """ - Logging object for use in command-line script. Allows ranges of - levels, to avoid some redundancy of displayed information. - """ - - DEBUG = logging.DEBUG - INFO = logging.INFO - NOTIFY = (logging.INFO+logging.WARN)/2 - WARN = WARNING = logging.WARN - ERROR = logging.ERROR - FATAL = logging.FATAL - - LEVELS = [DEBUG, INFO, NOTIFY, WARN, ERROR, FATAL] - - def __init__(self, consumers): - self.consumers = consumers - self.indent = 0 - self.in_progress = None - self.in_progress_hanging = False - - def debug(self, msg, *args, **kw): - self.log(self.DEBUG, msg, *args, **kw) - def info(self, msg, *args, **kw): - self.log(self.INFO, msg, *args, **kw) - def notify(self, msg, *args, **kw): - self.log(self.NOTIFY, msg, *args, **kw) - def warn(self, msg, *args, **kw): - self.log(self.WARN, msg, *args, **kw) - def error(self, msg, *args, **kw): - self.log(self.ERROR, msg, *args, **kw) - def fatal(self, msg, *args, **kw): - self.log(self.FATAL, msg, *args, **kw) - def log(self, level, msg, *args, **kw): - if args: - if kw: - raise TypeError( - "You may give positional or keyword arguments, not both") - args = args or kw - rendered = None - for consumer_level, consumer in self.consumers: - if self.level_matches(level, consumer_level): - if (self.in_progress_hanging - and consumer in (sys.stdout, sys.stderr)): - self.in_progress_hanging = False - sys.stdout.write('\n') - sys.stdout.flush() - if rendered is None: - if args: - rendered = msg % args - else: - rendered = msg - rendered = ' '*self.indent + rendered - if hasattr(consumer, 'write'): - consumer.write(rendered+'\n') - else: - consumer(rendered) - - def start_progress(self, msg): - assert not self.in_progress, ( - "Tried to start_progress(%r) while in_progress %r" - % (msg, self.in_progress)) - if self.level_matches(self.NOTIFY, self._stdout_level()): - sys.stdout.write(msg) - sys.stdout.flush() - self.in_progress_hanging = True - else: - self.in_progress_hanging = False - self.in_progress = msg - - def end_progress(self, msg='done.'): - assert self.in_progress, ( - "Tried to end_progress without start_progress") - if self.stdout_level_matches(self.NOTIFY): - if not self.in_progress_hanging: - # Some message has been printed out since start_progress - sys.stdout.write('...' + self.in_progress + msg + '\n') - sys.stdout.flush() - else: - sys.stdout.write(msg + '\n') - sys.stdout.flush() - self.in_progress = None - self.in_progress_hanging = False - - def show_progress(self): - """If we are in a progress scope, and no log messages have been - shown, write out another '.'""" - if self.in_progress_hanging: - sys.stdout.write('.') - sys.stdout.flush() - - def stdout_level_matches(self, level): - """Returns true if a message at this level will go to stdout""" - return self.level_matches(level, self._stdout_level()) - - def _stdout_level(self): - """Returns the level that stdout runs at""" - for level, consumer in self.consumers: - if consumer is sys.stdout: - return level - return self.FATAL - - def level_matches(self, level, consumer_level): - """ - >>> l = Logger([]) - >>> l.level_matches(3, 4) - False - >>> l.level_matches(3, 2) - True - >>> l.level_matches(slice(None, 3), 3) - False - >>> l.level_matches(slice(None, 3), 2) - True - >>> l.level_matches(slice(1, 3), 1) - True - >>> l.level_matches(slice(2, 3), 1) - False - """ - if isinstance(level, slice): - start, stop = level.start, level.stop - if start is not None and start > consumer_level: - return False - if stop is not None and stop <= consumer_level: - return False - return True - else: - return level >= consumer_level - - #@classmethod - def level_for_integer(cls, level): - levels = cls.LEVELS - if level < 0: - return levels[0] - if level >= len(levels): - return levels[-1] - return levels[level] - - level_for_integer = classmethod(level_for_integer) - -# create a silent logger just to prevent this from being undefined -# will be overridden with requested verbosity main() is called. -logger = Logger([(Logger.LEVELS[-1], sys.stdout)]) - -def mkdir(path): - if not os.path.exists(path): - logger.info('Creating %s', path) - os.makedirs(path) - else: - logger.info('Directory %s already exists', path) - -def copyfileordir(src, dest): - if os.path.isdir(src): - shutil.copytree(src, dest, True) - else: - shutil.copy2(src, dest) - -def copyfile(src, dest, symlink=True): - if not os.path.exists(src): - # Some bad symlink in the src - logger.warn('Cannot find file %s (bad symlink)', src) - return - if os.path.exists(dest): - logger.debug('File %s already exists', dest) - return - if not os.path.exists(os.path.dirname(dest)): - logger.info('Creating parent directories for %s' % os.path.dirname(dest)) - os.makedirs(os.path.dirname(dest)) - if not os.path.islink(src): - srcpath = os.path.abspath(src) - else: - srcpath = os.readlink(src) - if symlink and hasattr(os, 'symlink') and not is_win: - logger.info('Symlinking %s', dest) - try: - os.symlink(srcpath, dest) - except (OSError, NotImplementedError): - logger.info('Symlinking failed, copying to %s', dest) - copyfileordir(src, dest) - else: - logger.info('Copying to %s', dest) - copyfileordir(src, dest) - -def writefile(dest, content, overwrite=True): - if not os.path.exists(dest): - logger.info('Writing %s', dest) - f = open(dest, 'wb') - f.write(content.encode('utf-8')) - f.close() - return - else: - f = open(dest, 'rb') - c = f.read() - f.close() - if c != content.encode("utf-8"): - if not overwrite: - logger.notify('File %s exists with different content; not overwriting', dest) - return - logger.notify('Overwriting %s with new content', dest) - f = open(dest, 'wb') - f.write(content.encode('utf-8')) - f.close() - else: - logger.info('Content %s already in place', dest) - -def rmtree(dir): - if os.path.exists(dir): - logger.notify('Deleting tree %s', dir) - shutil.rmtree(dir) - else: - logger.info('Do not need to delete %s; already gone', dir) - -def make_exe(fn): - if hasattr(os, 'chmod'): - oldmode = os.stat(fn).st_mode & 0xFFF # 0o7777 - newmode = (oldmode | 0x16D) & 0xFFF # 0o555, 0o7777 - os.chmod(fn, newmode) - logger.info('Changed mode of %s to %s', fn, oct(newmode)) - -def _find_file(filename, dirs): - for dir in reversed(dirs): - files = glob.glob(os.path.join(dir, filename)) - if files and os.path.isfile(files[0]): - return True, files[0] - return False, filename - -def _install_req(py_executable, unzip=False, distribute=False, - search_dirs=None, never_download=False): - - if search_dirs is None: - search_dirs = file_search_dirs() - - if not distribute: - egg_path = 'setuptools-*-py%s.egg' % sys.version[:3] - found, egg_path = _find_file(egg_path, search_dirs) - project_name = 'setuptools' - bootstrap_script = EZ_SETUP_PY - tgz_path = None - else: - # Look for a distribute egg (these are not distributed by default, - # but can be made available by the user) - egg_path = 'distribute-*-py%s.egg' % sys.version[:3] - found, egg_path = _find_file(egg_path, search_dirs) - project_name = 'distribute' - if found: - tgz_path = None - bootstrap_script = DISTRIBUTE_FROM_EGG_PY - else: - # Fall back to sdist - # NB: egg_path is not None iff tgz_path is None - # iff bootstrap_script is a generic setup script accepting - # the standard arguments. - egg_path = None - tgz_path = 'distribute-*.tar.gz' - found, tgz_path = _find_file(tgz_path, search_dirs) - bootstrap_script = DISTRIBUTE_SETUP_PY - - if is_jython and os._name == 'nt': - # Jython's .bat sys.executable can't handle a command line - # argument with newlines - fd, ez_setup = tempfile.mkstemp('.py') - os.write(fd, bootstrap_script) - os.close(fd) - cmd = [py_executable, ez_setup] - else: - cmd = [py_executable, '-c', bootstrap_script] - if unzip and egg_path: - cmd.append('--always-unzip') - env = {} - remove_from_env = ['__PYVENV_LAUNCHER__'] - if logger.stdout_level_matches(logger.DEBUG) and egg_path: - cmd.append('-v') - - old_chdir = os.getcwd() - if egg_path is not None and os.path.exists(egg_path): - logger.info('Using existing %s egg: %s' % (project_name, egg_path)) - cmd.append(egg_path) - if os.environ.get('PYTHONPATH'): - env['PYTHONPATH'] = egg_path + os.path.pathsep + os.environ['PYTHONPATH'] - else: - env['PYTHONPATH'] = egg_path - elif tgz_path is not None and os.path.exists(tgz_path): - # Found a tgz source dist, let's chdir - logger.info('Using existing %s egg: %s' % (project_name, tgz_path)) - os.chdir(os.path.dirname(tgz_path)) - # in this case, we want to be sure that PYTHONPATH is unset (not - # just empty, really unset), else CPython tries to import the - # site.py that it's in virtualenv_support - remove_from_env.append('PYTHONPATH') - elif never_download: - logger.fatal("Can't find any local distributions of %s to install " - "and --never-download is set. Either re-run virtualenv " - "without the --never-download option, or place a %s " - "distribution (%s) in one of these " - "locations: %r" % (project_name, project_name, - egg_path or tgz_path, - search_dirs)) - sys.exit(1) - elif egg_path: - logger.info('No %s egg found; downloading' % project_name) - cmd.extend(['--always-copy', '-U', project_name]) - else: - logger.info('No %s tgz found; downloading' % project_name) - logger.start_progress('Installing %s...' % project_name) - logger.indent += 2 - cwd = None - if project_name == 'distribute': - env['DONT_PATCH_SETUPTOOLS'] = 'true' - - def _filter_ez_setup(line): - return filter_ez_setup(line, project_name) - - if not os.access(os.getcwd(), os.W_OK): - cwd = tempfile.mkdtemp() - if tgz_path is not None and os.path.exists(tgz_path): - # the current working dir is hostile, let's copy the - # tarball to a temp dir - target = os.path.join(cwd, os.path.split(tgz_path)[-1]) - shutil.copy(tgz_path, target) - try: - call_subprocess(cmd, show_stdout=False, - filter_stdout=_filter_ez_setup, - extra_env=env, - remove_from_env=remove_from_env, - cwd=cwd) - finally: - logger.indent -= 2 - logger.end_progress() - if cwd is not None: - shutil.rmtree(cwd) - if os.getcwd() != old_chdir: - os.chdir(old_chdir) - if is_jython and os._name == 'nt': - os.remove(ez_setup) - -def file_search_dirs(): - here = os.path.dirname(os.path.abspath(__file__)) - dirs = ['.', here, - join(here, 'virtualenv_support')] - if os.path.splitext(os.path.dirname(__file__))[0] != 'virtualenv': - # Probably some boot script; just in case virtualenv is installed... - try: - import virtualenv - except ImportError: - pass - else: - dirs.append(os.path.join(os.path.dirname(virtualenv.__file__), 'virtualenv_support')) - return [d for d in dirs if os.path.isdir(d)] - -def install_setuptools(py_executable, unzip=False, - search_dirs=None, never_download=False): - _install_req(py_executable, unzip, - search_dirs=search_dirs, never_download=never_download) - -def install_distribute(py_executable, unzip=False, - search_dirs=None, never_download=False): - _install_req(py_executable, unzip, distribute=True, - search_dirs=search_dirs, never_download=never_download) - -_pip_re = re.compile(r'^pip-.*(zip|tar.gz|tar.bz2|tgz|tbz)$', re.I) -def install_pip(py_executable, search_dirs=None, never_download=False): - if search_dirs is None: - search_dirs = file_search_dirs() - - filenames = [] - for dir in search_dirs: - filenames.extend([join(dir, fn) for fn in os.listdir(dir) - if _pip_re.search(fn)]) - filenames = [(os.path.basename(filename).lower(), i, filename) for i, filename in enumerate(filenames)] - filenames.sort() - filenames = [filename for basename, i, filename in filenames] - if not filenames: - filename = 'pip' - else: - filename = filenames[-1] - easy_install_script = 'easy_install' - if is_win: - easy_install_script = 'easy_install-script.py' - # There's two subtle issues here when invoking easy_install. - # 1. On unix-like systems the easy_install script can *only* be executed - # directly if its full filesystem path is no longer than 78 characters. - # 2. A work around to [1] is to use the `python path/to/easy_install foo` - # pattern, but that breaks if the path contains non-ASCII characters, as - # you can't put the file encoding declaration before the shebang line. - # The solution is to use Python's -x flag to skip the first line of the - # script (and any ASCII decoding errors that may have occurred in that line) - cmd = [py_executable, '-x', join(os.path.dirname(py_executable), easy_install_script), filename] - # jython and pypy don't yet support -x - if is_jython or is_pypy: - cmd.remove('-x') - if filename == 'pip': - if never_download: - logger.fatal("Can't find any local distributions of pip to install " - "and --never-download is set. Either re-run virtualenv " - "without the --never-download option, or place a pip " - "source distribution (zip/tar.gz/tar.bz2) in one of these " - "locations: %r" % search_dirs) - sys.exit(1) - logger.info('Installing pip from network...') - else: - logger.info('Installing existing %s distribution: %s' % ( - os.path.basename(filename), filename)) - logger.start_progress('Installing pip...') - logger.indent += 2 - def _filter_setup(line): - return filter_ez_setup(line, 'pip') - try: - call_subprocess(cmd, show_stdout=False, - filter_stdout=_filter_setup) - finally: - logger.indent -= 2 - logger.end_progress() - -def filter_ez_setup(line, project_name='setuptools'): - if not line.strip(): - return Logger.DEBUG - if project_name == 'distribute': - for prefix in ('Extracting', 'Now working', 'Installing', 'Before', - 'Scanning', 'Setuptools', 'Egg', 'Already', - 'running', 'writing', 'reading', 'installing', - 'creating', 'copying', 'byte-compiling', 'removing', - 'Processing'): - if line.startswith(prefix): - return Logger.DEBUG - return Logger.DEBUG - for prefix in ['Reading ', 'Best match', 'Processing setuptools', - 'Copying setuptools', 'Adding setuptools', - 'Installing ', 'Installed ']: - if line.startswith(prefix): - return Logger.DEBUG - return Logger.INFO - - -class UpdatingDefaultsHelpFormatter(optparse.IndentedHelpFormatter): - """ - Custom help formatter for use in ConfigOptionParser that updates - the defaults before expanding them, allowing them to show up correctly - in the help listing - """ - def expand_default(self, option): - if self.parser is not None: - self.parser.update_defaults(self.parser.defaults) - return optparse.IndentedHelpFormatter.expand_default(self, option) - - -class ConfigOptionParser(optparse.OptionParser): - """ - Custom option parser which updates its defaults by by checking the - configuration files and environmental variables - """ - def __init__(self, *args, **kwargs): - self.config = ConfigParser.RawConfigParser() - self.files = self.get_config_files() - self.config.read(self.files) - optparse.OptionParser.__init__(self, *args, **kwargs) - - def get_config_files(self): - config_file = os.environ.get('VIRTUALENV_CONFIG_FILE', False) - if config_file and os.path.exists(config_file): - return [config_file] - return [default_config_file] - - def update_defaults(self, defaults): - """ - Updates the given defaults with values from the config files and - the environ. Does a little special handling for certain types of - options (lists). - """ - # Then go and look for the other sources of configuration: - config = {} - # 1. config files - config.update(dict(self.get_config_section('virtualenv'))) - # 2. environmental variables - config.update(dict(self.get_environ_vars())) - # Then set the options with those values - for key, val in config.items(): - key = key.replace('_', '-') - if not key.startswith('--'): - key = '--%s' % key # only prefer long opts - option = self.get_option(key) - if option is not None: - # ignore empty values - if not val: - continue - # handle multiline configs - if option.action == 'append': - val = val.split() - else: - option.nargs = 1 - if option.action == 'store_false': - val = not strtobool(val) - elif option.action in ('store_true', 'count'): - val = strtobool(val) - try: - val = option.convert_value(key, val) - except optparse.OptionValueError: - e = sys.exc_info()[1] - print("An error occured during configuration: %s" % e) - sys.exit(3) - defaults[option.dest] = val - return defaults - - def get_config_section(self, name): - """ - Get a section of a configuration - """ - if self.config.has_section(name): - return self.config.items(name) - return [] - - def get_environ_vars(self, prefix='VIRTUALENV_'): - """ - Returns a generator with all environmental vars with prefix VIRTUALENV - """ - for key, val in os.environ.items(): - if key.startswith(prefix): - yield (key.replace(prefix, '').lower(), val) - - def get_default_values(self): - """ - Overridding to make updating the defaults after instantiation of - the option parser possible, update_defaults() does the dirty work. - """ - if not self.process_default_values: - # Old, pre-Optik 1.5 behaviour. - return optparse.Values(self.defaults) - - defaults = self.update_defaults(self.defaults.copy()) # ours - for option in self._get_all_options(): - default = defaults.get(option.dest) - if isinstance(default, basestring): - opt_str = option.get_opt_string() - defaults[option.dest] = option.check_value(opt_str, default) - return optparse.Values(defaults) - - -def main(): - parser = ConfigOptionParser( - version=virtualenv_version, - usage="%prog [OPTIONS] DEST_DIR", - formatter=UpdatingDefaultsHelpFormatter()) - - parser.add_option( - '-v', '--verbose', - action='count', - dest='verbose', - default=0, - help="Increase verbosity") - - parser.add_option( - '-q', '--quiet', - action='count', - dest='quiet', - default=0, - help='Decrease verbosity') - - parser.add_option( - '-p', '--python', - dest='python', - metavar='PYTHON_EXE', - help='The Python interpreter to use, e.g., --python=python2.5 will use the python2.5 ' - 'interpreter to create the new environment. The default is the interpreter that ' - 'virtualenv was installed with (%s)' % sys.executable) - - parser.add_option( - '--clear', - dest='clear', - action='store_true', - help="Clear out the non-root install and start from scratch") - - parser.set_defaults(system_site_packages=False) - parser.add_option( - '--no-site-packages', - dest='system_site_packages', - action='store_false', - help="Don't give access to the global site-packages dir to the " - "virtual environment (default)") - - parser.add_option( - '--system-site-packages', - dest='system_site_packages', - action='store_true', - help="Give access to the global site-packages dir to the " - "virtual environment") - - parser.add_option( - '--unzip-setuptools', - dest='unzip_setuptools', - action='store_true', - help="Unzip Setuptools or Distribute when installing it") - - parser.add_option( - '--relocatable', - dest='relocatable', - action='store_true', - help='Make an EXISTING virtualenv environment relocatable. ' - 'This fixes up scripts and makes all .pth files relative') - - parser.add_option( - '--distribute', '--use-distribute', # the second option is for legacy reasons here. Hi Kenneth! - dest='use_distribute', - action='store_true', - help='Use Distribute instead of Setuptools. Set environ variable ' - 'VIRTUALENV_DISTRIBUTE to make it the default ') - - parser.add_option( - '--no-setuptools', - dest='no_setuptools', - action='store_true', - help='Do not install distribute/setuptools (or pip) ' - 'in the new virtualenv.') - - parser.add_option( - '--no-pip', - dest='no_pip', - action='store_true', - help='Do not install pip in the new virtualenv.') - - parser.add_option( - '--setuptools', - dest='use_distribute', - action='store_false', - help='Use Setuptools instead of Distribute. Set environ variable ' - 'VIRTUALENV_SETUPTOOLS to make it the default ') - - # Set this to True to use distribute by default, even in Python 2. - parser.set_defaults(use_distribute=False) - - default_search_dirs = file_search_dirs() - parser.add_option( - '--extra-search-dir', - dest="search_dirs", - action="append", - default=default_search_dirs, - help="Directory to look for setuptools/distribute/pip distributions in. " - "You can add any number of additional --extra-search-dir paths.") - - parser.add_option( - '--never-download', - dest="never_download", - action="store_true", - help="Never download anything from the network. Instead, virtualenv will fail " - "if local distributions of setuptools/distribute/pip are not present.") - - parser.add_option( - '--prompt', - dest='prompt', - help='Provides an alternative prompt prefix for this environment') - - if 'extend_parser' in globals(): - extend_parser(parser) - - options, args = parser.parse_args() - - global logger - - if 'adjust_options' in globals(): - adjust_options(options, args) - - verbosity = options.verbose - options.quiet - logger = Logger([(Logger.level_for_integer(2 - verbosity), sys.stdout)]) - - if options.python and not os.environ.get('VIRTUALENV_INTERPRETER_RUNNING'): - env = os.environ.copy() - interpreter = resolve_interpreter(options.python) - if interpreter == sys.executable: - logger.warn('Already using interpreter %s' % interpreter) - else: - logger.notify('Running virtualenv with interpreter %s' % interpreter) - env['VIRTUALENV_INTERPRETER_RUNNING'] = 'true' - file = __file__ - if file.endswith('.pyc'): - file = file[:-1] - popen = subprocess.Popen([interpreter, file] + sys.argv[1:], env=env) - raise SystemExit(popen.wait()) - - # Force --distribute on Python 3, since setuptools is not available. - if majver > 2: - options.use_distribute = True - - if os.environ.get('PYTHONDONTWRITEBYTECODE') and not options.use_distribute: - print( - "The PYTHONDONTWRITEBYTECODE environment variable is " - "not compatible with setuptools. Either use --distribute " - "or unset PYTHONDONTWRITEBYTECODE.") - sys.exit(2) - if not args: - print('You must provide a DEST_DIR') - parser.print_help() - sys.exit(2) - if len(args) > 1: - print('There must be only one argument: DEST_DIR (you gave %s)' % ( - ' '.join(args))) - parser.print_help() - sys.exit(2) - - home_dir = args[0] - - if os.environ.get('WORKING_ENV'): - logger.fatal('ERROR: you cannot run virtualenv while in a workingenv') - logger.fatal('Please deactivate your workingenv, then re-run this script') - sys.exit(3) - - if 'PYTHONHOME' in os.environ: - logger.warn('PYTHONHOME is set. You *must* activate the virtualenv before using it') - del os.environ['PYTHONHOME'] - - if options.relocatable: - make_environment_relocatable(home_dir) - return - - create_environment(home_dir, - site_packages=options.system_site_packages, - clear=options.clear, - unzip_setuptools=options.unzip_setuptools, - use_distribute=options.use_distribute, - prompt=options.prompt, - search_dirs=options.search_dirs, - never_download=options.never_download, - no_setuptools=options.no_setuptools, - no_pip=options.no_pip) - if 'after_install' in globals(): - after_install(options, home_dir) - -def call_subprocess(cmd, show_stdout=True, - filter_stdout=None, cwd=None, - raise_on_returncode=True, extra_env=None, - remove_from_env=None): - cmd_parts = [] - for part in cmd: - if len(part) > 45: - part = part[:20]+"..."+part[-20:] - if ' ' in part or '\n' in part or '"' in part or "'" in part: - part = '"%s"' % part.replace('"', '\\"') - if hasattr(part, 'decode'): - try: - part = part.decode(sys.getdefaultencoding()) - except UnicodeDecodeError: - part = part.decode(sys.getfilesystemencoding()) - cmd_parts.append(part) - cmd_desc = ' '.join(cmd_parts) - if show_stdout: - stdout = None - else: - stdout = subprocess.PIPE - logger.debug("Running command %s" % cmd_desc) - if extra_env or remove_from_env: - env = os.environ.copy() - if extra_env: - env.update(extra_env) - if remove_from_env: - for varname in remove_from_env: - env.pop(varname, None) - else: - env = None - try: - proc = subprocess.Popen( - cmd, stderr=subprocess.STDOUT, stdin=None, stdout=stdout, - cwd=cwd, env=env) - except Exception: - e = sys.exc_info()[1] - logger.fatal( - "Error %s while executing command %s" % (e, cmd_desc)) - raise - all_output = [] - if stdout is not None: - stdout = proc.stdout - encoding = sys.getdefaultencoding() - fs_encoding = sys.getfilesystemencoding() - while 1: - line = stdout.readline() - try: - line = line.decode(encoding) - except UnicodeDecodeError: - line = line.decode(fs_encoding) - if not line: - break - line = line.rstrip() - all_output.append(line) - if filter_stdout: - level = filter_stdout(line) - if isinstance(level, tuple): - level, line = level - logger.log(level, line) - if not logger.stdout_level_matches(level): - logger.show_progress() - else: - logger.info(line) - else: - proc.communicate() - proc.wait() - if proc.returncode: - if raise_on_returncode: - if all_output: - logger.notify('Complete output from command %s:' % cmd_desc) - logger.notify('\n'.join(all_output) + '\n----------------------------------------') - raise OSError( - "Command %s failed with error code %s" - % (cmd_desc, proc.returncode)) - else: - logger.warn( - "Command %s had error code %s" - % (cmd_desc, proc.returncode)) - - -def create_environment(home_dir, site_packages=False, clear=False, - unzip_setuptools=False, use_distribute=False, - prompt=None, search_dirs=None, never_download=False, - no_setuptools=False, no_pip=False): - """ - Creates a new environment in ``home_dir``. - - If ``site_packages`` is true, then the global ``site-packages/`` - directory will be on the path. - - If ``clear`` is true (default False) then the environment will - first be cleared. - """ - home_dir, lib_dir, inc_dir, bin_dir = path_locations(home_dir) - - py_executable = os.path.abspath(install_python( - home_dir, lib_dir, inc_dir, bin_dir, - site_packages=site_packages, clear=clear)) - - install_distutils(home_dir) - - if not no_setuptools: - if use_distribute: - install_distribute(py_executable, unzip=unzip_setuptools, - search_dirs=search_dirs, never_download=never_download) - else: - install_setuptools(py_executable, unzip=unzip_setuptools, - search_dirs=search_dirs, never_download=never_download) - - if not no_pip: - install_pip(py_executable, search_dirs=search_dirs, never_download=never_download) - - install_activate(home_dir, bin_dir, prompt) - -def is_executable_file(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - -def path_locations(home_dir): - """Return the path locations for the environment (where libraries are, - where scripts go, etc)""" - # XXX: We'd use distutils.sysconfig.get_python_inc/lib but its - # prefix arg is broken: http://bugs.python.org/issue3386 - if is_win: - # Windows has lots of problems with executables with spaces in - # the name; this function will remove them (using the ~1 - # format): - mkdir(home_dir) - if ' ' in home_dir: - import ctypes - GetShortPathName = ctypes.windll.kernel32.GetShortPathNameW - size = max(len(home_dir)+1, 256) - buf = ctypes.create_unicode_buffer(size) - try: - u = unicode - except NameError: - u = str - ret = GetShortPathName(u(home_dir), buf, size) - if not ret: - print('Error: the path "%s" has a space in it' % home_dir) - print('We could not determine the short pathname for it.') - print('Exiting.') - sys.exit(3) - home_dir = str(buf.value) - lib_dir = join(home_dir, 'Lib') - inc_dir = join(home_dir, 'Include') - bin_dir = join(home_dir, 'Scripts') - if is_jython: - lib_dir = join(home_dir, 'Lib') - inc_dir = join(home_dir, 'Include') - bin_dir = join(home_dir, 'bin') - elif is_pypy: - lib_dir = home_dir - inc_dir = join(home_dir, 'include') - bin_dir = join(home_dir, 'bin') - elif not is_win: - lib_dir = join(home_dir, 'lib', py_version) - multiarch_exec = '/usr/bin/multiarch-platform' - if is_executable_file(multiarch_exec): - # In Mageia (2) and Mandriva distros the include dir must be like: - # virtualenv/include/multiarch-x86_64-linux/python2.7 - # instead of being virtualenv/include/python2.7 - p = subprocess.Popen(multiarch_exec, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() - # stdout.strip is needed to remove newline character - inc_dir = join(home_dir, 'include', stdout.strip(), py_version + abiflags) - else: - inc_dir = join(home_dir, 'include', py_version + abiflags) - bin_dir = join(home_dir, 'bin') - return home_dir, lib_dir, inc_dir, bin_dir - - -def change_prefix(filename, dst_prefix): - prefixes = [sys.prefix] - - if is_darwin: - prefixes.extend(( - os.path.join("/Library/Python", sys.version[:3], "site-packages"), - os.path.join(sys.prefix, "Extras", "lib", "python"), - os.path.join("~", "Library", "Python", sys.version[:3], "site-packages"), - # Python 2.6 no-frameworks - os.path.join("~", ".local", "lib","python", sys.version[:3], "site-packages"), - # System Python 2.7 on OSX Mountain Lion - os.path.join("~", "Library", "Python", sys.version[:3], "lib", "python", "site-packages"))) - - if hasattr(sys, 'real_prefix'): - prefixes.append(sys.real_prefix) - if hasattr(sys, 'base_prefix'): - prefixes.append(sys.base_prefix) - prefixes = list(map(os.path.expanduser, prefixes)) - prefixes = list(map(os.path.abspath, prefixes)) - # Check longer prefixes first so we don't split in the middle of a filename - prefixes = sorted(prefixes, key=len, reverse=True) - filename = os.path.abspath(filename) - for src_prefix in prefixes: - if filename.startswith(src_prefix): - _, relpath = filename.split(src_prefix, 1) - if src_prefix != os.sep: # sys.prefix == "/" - assert relpath[0] == os.sep - relpath = relpath[1:] - return join(dst_prefix, relpath) - assert False, "Filename %s does not start with any of these prefixes: %s" % \ - (filename, prefixes) - -def copy_required_modules(dst_prefix): - import imp - # If we are running under -p, we need to remove the current - # directory from sys.path temporarily here, so that we - # definitely get the modules from the site directory of - # the interpreter we are running under, not the one - # virtualenv.py is installed under (which might lead to py2/py3 - # incompatibility issues) - _prev_sys_path = sys.path - if os.environ.get('VIRTUALENV_INTERPRETER_RUNNING'): - sys.path = sys.path[1:] - try: - for modname in REQUIRED_MODULES: - if modname in sys.builtin_module_names: - logger.info("Ignoring built-in bootstrap module: %s" % modname) - continue - try: - f, filename, _ = imp.find_module(modname) - except ImportError: - logger.info("Cannot import bootstrap module: %s" % modname) - else: - if f is not None: - f.close() - # special-case custom readline.so on OS X, but not for pypy: - if modname == 'readline' and sys.platform == 'darwin' and not ( - is_pypy or filename.endswith(join('lib-dynload', 'readline.so'))): - dst_filename = join(dst_prefix, 'lib', 'python%s' % sys.version[:3], 'readline.so') - else: - dst_filename = change_prefix(filename, dst_prefix) - copyfile(filename, dst_filename) - if filename.endswith('.pyc'): - pyfile = filename[:-1] - if os.path.exists(pyfile): - copyfile(pyfile, dst_filename[:-1]) - finally: - sys.path = _prev_sys_path - - -def subst_path(prefix_path, prefix, home_dir): - prefix_path = os.path.normpath(prefix_path) - prefix = os.path.normpath(prefix) - home_dir = os.path.normpath(home_dir) - if not prefix_path.startswith(prefix): - logger.warn('Path not in prefix %r %r', prefix_path, prefix) - return - return prefix_path.replace(prefix, home_dir, 1) - - -def install_python(home_dir, lib_dir, inc_dir, bin_dir, site_packages, clear): - """Install just the base environment, no distutils patches etc""" - if sys.executable.startswith(bin_dir): - print('Please use the *system* python to run this script') - return - - if clear: - rmtree(lib_dir) - ## FIXME: why not delete it? - ## Maybe it should delete everything with #!/path/to/venv/python in it - logger.notify('Not deleting %s', bin_dir) - - if hasattr(sys, 'real_prefix'): - logger.notify('Using real prefix %r' % sys.real_prefix) - prefix = sys.real_prefix - elif hasattr(sys, 'base_prefix'): - logger.notify('Using base prefix %r' % sys.base_prefix) - prefix = sys.base_prefix - else: - prefix = sys.prefix - mkdir(lib_dir) - fix_lib64(lib_dir) - stdlib_dirs = [os.path.dirname(os.__file__)] - if is_win: - stdlib_dirs.append(join(os.path.dirname(stdlib_dirs[0]), 'DLLs')) - elif is_darwin: - stdlib_dirs.append(join(stdlib_dirs[0], 'site-packages')) - if hasattr(os, 'symlink'): - logger.info('Symlinking Python bootstrap modules') - else: - logger.info('Copying Python bootstrap modules') - logger.indent += 2 - try: - # copy required files... - for stdlib_dir in stdlib_dirs: - if not os.path.isdir(stdlib_dir): - continue - for fn in os.listdir(stdlib_dir): - bn = os.path.splitext(fn)[0] - if fn != 'site-packages' and bn in REQUIRED_FILES: - copyfile(join(stdlib_dir, fn), join(lib_dir, fn)) - # ...and modules - copy_required_modules(home_dir) - finally: - logger.indent -= 2 - mkdir(join(lib_dir, 'site-packages')) - import site - site_filename = site.__file__ - if site_filename.endswith('.pyc'): - site_filename = site_filename[:-1] - elif site_filename.endswith('$py.class'): - site_filename = site_filename.replace('$py.class', '.py') - site_filename_dst = change_prefix(site_filename, home_dir) - site_dir = os.path.dirname(site_filename_dst) - writefile(site_filename_dst, SITE_PY) - writefile(join(site_dir, 'orig-prefix.txt'), prefix) - site_packages_filename = join(site_dir, 'no-global-site-packages.txt') - if not site_packages: - writefile(site_packages_filename, '') - - if is_pypy or is_win: - stdinc_dir = join(prefix, 'include') - else: - stdinc_dir = join(prefix, 'include', py_version + abiflags) - if os.path.exists(stdinc_dir): - copyfile(stdinc_dir, inc_dir) - else: - logger.debug('No include dir %s' % stdinc_dir) - - platinc_dir = distutils.sysconfig.get_python_inc(plat_specific=1) - if platinc_dir != stdinc_dir: - platinc_dest = distutils.sysconfig.get_python_inc( - plat_specific=1, prefix=home_dir) - if platinc_dir == platinc_dest: - # Do platinc_dest manually due to a CPython bug; - # not http://bugs.python.org/issue3386 but a close cousin - platinc_dest = subst_path(platinc_dir, prefix, home_dir) - if platinc_dest: - # PyPy's stdinc_dir and prefix are relative to the original binary - # (traversing virtualenvs), whereas the platinc_dir is relative to - # the inner virtualenv and ignores the prefix argument. - # This seems more evolved than designed. - copyfile(platinc_dir, platinc_dest) - - # pypy never uses exec_prefix, just ignore it - if sys.exec_prefix != prefix and not is_pypy: - if is_win: - exec_dir = join(sys.exec_prefix, 'lib') - elif is_jython: - exec_dir = join(sys.exec_prefix, 'Lib') - else: - exec_dir = join(sys.exec_prefix, 'lib', py_version) - for fn in os.listdir(exec_dir): - copyfile(join(exec_dir, fn), join(lib_dir, fn)) - - if is_jython: - # Jython has either jython-dev.jar and javalib/ dir, or just - # jython.jar - for name in 'jython-dev.jar', 'javalib', 'jython.jar': - src = join(prefix, name) - if os.path.exists(src): - copyfile(src, join(home_dir, name)) - # XXX: registry should always exist after Jython 2.5rc1 - src = join(prefix, 'registry') - if os.path.exists(src): - copyfile(src, join(home_dir, 'registry'), symlink=False) - copyfile(join(prefix, 'cachedir'), join(home_dir, 'cachedir'), - symlink=False) - - mkdir(bin_dir) - py_executable = join(bin_dir, os.path.basename(sys.executable)) - if 'Python.framework' in prefix: - # OS X framework builds cause validation to break - # https://github.com/pypa/virtualenv/issues/322 - if os.environ.get('__PYVENV_LAUNCHER__'): - os.unsetenv('__PYVENV_LAUNCHER__') - if re.search(r'/Python(?:-32|-64)*$', py_executable): - # The name of the python executable is not quite what - # we want, rename it. - py_executable = os.path.join( - os.path.dirname(py_executable), 'python') - - logger.notify('New %s executable in %s', expected_exe, py_executable) - pcbuild_dir = os.path.dirname(sys.executable) - pyd_pth = os.path.join(lib_dir, 'site-packages', 'virtualenv_builddir_pyd.pth') - if is_win and os.path.exists(os.path.join(pcbuild_dir, 'build.bat')): - logger.notify('Detected python running from build directory %s', pcbuild_dir) - logger.notify('Writing .pth file linking to build directory for *.pyd files') - writefile(pyd_pth, pcbuild_dir) - else: - pcbuild_dir = None - if os.path.exists(pyd_pth): - logger.info('Deleting %s (not Windows env or not build directory python)' % pyd_pth) - os.unlink(pyd_pth) - - if sys.executable != py_executable: - ## FIXME: could I just hard link? - executable = sys.executable - shutil.copyfile(executable, py_executable) - make_exe(py_executable) - if is_win or is_cygwin: - pythonw = os.path.join(os.path.dirname(sys.executable), 'pythonw.exe') - if os.path.exists(pythonw): - logger.info('Also created pythonw.exe') - shutil.copyfile(pythonw, os.path.join(os.path.dirname(py_executable), 'pythonw.exe')) - python_d = os.path.join(os.path.dirname(sys.executable), 'python_d.exe') - python_d_dest = os.path.join(os.path.dirname(py_executable), 'python_d.exe') - if os.path.exists(python_d): - logger.info('Also created python_d.exe') - shutil.copyfile(python_d, python_d_dest) - elif os.path.exists(python_d_dest): - logger.info('Removed python_d.exe as it is no longer at the source') - os.unlink(python_d_dest) - # we need to copy the DLL to enforce that windows will load the correct one. - # may not exist if we are cygwin. - py_executable_dll = 'python%s%s.dll' % ( - sys.version_info[0], sys.version_info[1]) - py_executable_dll_d = 'python%s%s_d.dll' % ( - sys.version_info[0], sys.version_info[1]) - pythondll = os.path.join(os.path.dirname(sys.executable), py_executable_dll) - pythondll_d = os.path.join(os.path.dirname(sys.executable), py_executable_dll_d) - pythondll_d_dest = os.path.join(os.path.dirname(py_executable), py_executable_dll_d) - if os.path.exists(pythondll): - logger.info('Also created %s' % py_executable_dll) - shutil.copyfile(pythondll, os.path.join(os.path.dirname(py_executable), py_executable_dll)) - if os.path.exists(pythondll_d): - logger.info('Also created %s' % py_executable_dll_d) - shutil.copyfile(pythondll_d, pythondll_d_dest) - elif os.path.exists(pythondll_d_dest): - logger.info('Removed %s as the source does not exist' % pythondll_d_dest) - os.unlink(pythondll_d_dest) - if is_pypy: - # make a symlink python --> pypy-c - python_executable = os.path.join(os.path.dirname(py_executable), 'python') - if sys.platform in ('win32', 'cygwin'): - python_executable += '.exe' - logger.info('Also created executable %s' % python_executable) - copyfile(py_executable, python_executable) - - if is_win: - for name in 'libexpat.dll', 'libpypy.dll', 'libpypy-c.dll', 'libeay32.dll', 'ssleay32.dll', 'sqlite.dll': - src = join(prefix, name) - if os.path.exists(src): - copyfile(src, join(bin_dir, name)) - - if os.path.splitext(os.path.basename(py_executable))[0] != expected_exe: - secondary_exe = os.path.join(os.path.dirname(py_executable), - expected_exe) - py_executable_ext = os.path.splitext(py_executable)[1] - if py_executable_ext == '.exe': - # python2.4 gives an extension of '.4' :P - secondary_exe += py_executable_ext - if os.path.exists(secondary_exe): - logger.warn('Not overwriting existing %s script %s (you must use %s)' - % (expected_exe, secondary_exe, py_executable)) - else: - logger.notify('Also creating executable in %s' % secondary_exe) - shutil.copyfile(sys.executable, secondary_exe) - make_exe(secondary_exe) - - if '.framework' in prefix: - if 'Python.framework' in prefix: - logger.debug('MacOSX Python framework detected') - # Make sure we use the the embedded interpreter inside - # the framework, even if sys.executable points to - # the stub executable in ${sys.prefix}/bin - # See http://groups.google.com/group/python-virtualenv/ - # browse_thread/thread/17cab2f85da75951 - original_python = os.path.join( - prefix, 'Resources/Python.app/Contents/MacOS/Python') - if 'EPD' in prefix: - logger.debug('EPD framework detected') - original_python = os.path.join(prefix, 'bin/python') - shutil.copy(original_python, py_executable) - - # Copy the framework's dylib into the virtual - # environment - virtual_lib = os.path.join(home_dir, '.Python') - - if os.path.exists(virtual_lib): - os.unlink(virtual_lib) - copyfile( - os.path.join(prefix, 'Python'), - virtual_lib) - - # And then change the install_name of the copied python executable - try: - mach_o_change(py_executable, - os.path.join(prefix, 'Python'), - '@executable_path/../.Python') - except: - e = sys.exc_info()[1] - logger.warn("Could not call mach_o_change: %s. " - "Trying to call install_name_tool instead." % e) - try: - call_subprocess( - ["install_name_tool", "-change", - os.path.join(prefix, 'Python'), - '@executable_path/../.Python', - py_executable]) - except: - logger.fatal("Could not call install_name_tool -- you must " - "have Apple's development tools installed") - raise - - if not is_win: - # Ensure that 'python', 'pythonX' and 'pythonX.Y' all exist - py_exe_version_major = 'python%s' % sys.version_info[0] - py_exe_version_major_minor = 'python%s.%s' % ( - sys.version_info[0], sys.version_info[1]) - py_exe_no_version = 'python' - required_symlinks = [ py_exe_no_version, py_exe_version_major, - py_exe_version_major_minor ] - - py_executable_base = os.path.basename(py_executable) - - if py_executable_base in required_symlinks: - # Don't try to symlink to yourself. - required_symlinks.remove(py_executable_base) - - for pth in required_symlinks: - full_pth = join(bin_dir, pth) - if os.path.exists(full_pth): - os.unlink(full_pth) - os.symlink(py_executable_base, full_pth) - - if is_win and ' ' in py_executable: - # There's a bug with subprocess on Windows when using a first - # argument that has a space in it. Instead we have to quote - # the value: - py_executable = '"%s"' % py_executable - # NOTE: keep this check as one line, cmd.exe doesn't cope with line breaks - cmd = [py_executable, '-c', 'import sys;out=sys.stdout;' - 'getattr(out, "buffer", out).write(sys.prefix.encode("utf-8"))'] - logger.info('Testing executable with %s %s "%s"' % tuple(cmd)) - try: - proc = subprocess.Popen(cmd, - stdout=subprocess.PIPE) - proc_stdout, proc_stderr = proc.communicate() - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.EACCES: - logger.fatal('ERROR: The executable %s could not be run: %s' % (py_executable, e)) - sys.exit(100) - else: - raise e - - proc_stdout = proc_stdout.strip().decode("utf-8") - proc_stdout = os.path.normcase(os.path.abspath(proc_stdout)) - norm_home_dir = os.path.normcase(os.path.abspath(home_dir)) - if hasattr(norm_home_dir, 'decode'): - norm_home_dir = norm_home_dir.decode(sys.getfilesystemencoding()) - if proc_stdout != norm_home_dir: - logger.fatal( - 'ERROR: The executable %s is not functioning' % py_executable) - logger.fatal( - 'ERROR: It thinks sys.prefix is %r (should be %r)' - % (proc_stdout, norm_home_dir)) - logger.fatal( - 'ERROR: virtualenv is not compatible with this system or executable') - if is_win: - logger.fatal( - 'Note: some Windows users have reported this error when they ' - 'installed Python for "Only this user" or have multiple ' - 'versions of Python installed. Copying the appropriate ' - 'PythonXX.dll to the virtualenv Scripts/ directory may fix ' - 'this problem.') - sys.exit(100) - else: - logger.info('Got sys.prefix result: %r' % proc_stdout) - - pydistutils = os.path.expanduser('~/.pydistutils.cfg') - if os.path.exists(pydistutils): - logger.notify('Please make sure you remove any previous custom paths from ' - 'your %s file.' % pydistutils) - ## FIXME: really this should be calculated earlier - - fix_local_scheme(home_dir) - - if site_packages: - if os.path.exists(site_packages_filename): - logger.info('Deleting %s' % site_packages_filename) - os.unlink(site_packages_filename) - - return py_executable - - -def install_activate(home_dir, bin_dir, prompt=None): - home_dir = os.path.abspath(home_dir) - if is_win or is_jython and os._name == 'nt': - files = { - 'activate.bat': ACTIVATE_BAT, - 'deactivate.bat': DEACTIVATE_BAT, - 'activate.ps1': ACTIVATE_PS, - } - - # MSYS needs paths of the form /c/path/to/file - drive, tail = os.path.splitdrive(home_dir.replace(os.sep, '/')) - home_dir_msys = (drive and "/%s%s" or "%s%s") % (drive[:1], tail) - - # Run-time conditional enables (basic) Cygwin compatibility - home_dir_sh = ("""$(if [ "$OSTYPE" "==" "cygwin" ]; then cygpath -u '%s'; else echo '%s'; fi;)""" % - (home_dir, home_dir_msys)) - files['activate'] = ACTIVATE_SH.replace('__VIRTUAL_ENV__', home_dir_sh) - - else: - files = {'activate': ACTIVATE_SH} - - # suppling activate.fish in addition to, not instead of, the - # bash script support. - files['activate.fish'] = ACTIVATE_FISH - - # same for csh/tcsh support... - files['activate.csh'] = ACTIVATE_CSH - - files['activate_this.py'] = ACTIVATE_THIS - if hasattr(home_dir, 'decode'): - home_dir = home_dir.decode(sys.getfilesystemencoding()) - vname = os.path.basename(home_dir) - for name, content in files.items(): - content = content.replace('__VIRTUAL_PROMPT__', prompt or '') - content = content.replace('__VIRTUAL_WINPROMPT__', prompt or '(%s)' % vname) - content = content.replace('__VIRTUAL_ENV__', home_dir) - content = content.replace('__VIRTUAL_NAME__', vname) - content = content.replace('__BIN_NAME__', os.path.basename(bin_dir)) - writefile(os.path.join(bin_dir, name), content) - -def install_distutils(home_dir): - distutils_path = change_prefix(distutils.__path__[0], home_dir) - mkdir(distutils_path) - ## FIXME: maybe this prefix setting should only be put in place if - ## there's a local distutils.cfg with a prefix setting? - home_dir = os.path.abspath(home_dir) - ## FIXME: this is breaking things, removing for now: - #distutils_cfg = DISTUTILS_CFG + "\n[install]\nprefix=%s\n" % home_dir - writefile(os.path.join(distutils_path, '__init__.py'), DISTUTILS_INIT) - writefile(os.path.join(distutils_path, 'distutils.cfg'), DISTUTILS_CFG, overwrite=False) - -def fix_local_scheme(home_dir): - """ - Platforms that use the "posix_local" install scheme (like Ubuntu with - Python 2.7) need to be given an additional "local" location, sigh. - """ - try: - import sysconfig - except ImportError: - pass - else: - if sysconfig._get_default_scheme() == 'posix_local': - local_path = os.path.join(home_dir, 'local') - if not os.path.exists(local_path): - os.mkdir(local_path) - for subdir_name in os.listdir(home_dir): - if subdir_name == 'local': - continue - os.symlink(os.path.abspath(os.path.join(home_dir, subdir_name)), \ - os.path.join(local_path, subdir_name)) - -def fix_lib64(lib_dir): - """ - Some platforms (particularly Gentoo on x64) put things in lib64/pythonX.Y - instead of lib/pythonX.Y. If this is such a platform we'll just create a - symlink so lib64 points to lib - """ - if [p for p in distutils.sysconfig.get_config_vars().values() - if isinstance(p, basestring) and 'lib64' in p]: - logger.debug('This system uses lib64; symlinking lib64 to lib') - assert os.path.basename(lib_dir) == 'python%s' % sys.version[:3], ( - "Unexpected python lib dir: %r" % lib_dir) - lib_parent = os.path.dirname(lib_dir) - top_level = os.path.dirname(lib_parent) - lib_dir = os.path.join(top_level, 'lib') - lib64_link = os.path.join(top_level, 'lib64') - assert os.path.basename(lib_parent) == 'lib', ( - "Unexpected parent dir: %r" % lib_parent) - if os.path.lexists(lib64_link): - return - os.symlink('lib', lib64_link) - -def resolve_interpreter(exe): - """ - If the executable given isn't an absolute path, search $PATH for the interpreter - """ - if os.path.abspath(exe) != exe: - paths = os.environ.get('PATH', '').split(os.pathsep) - for path in paths: - if os.path.exists(os.path.join(path, exe)): - exe = os.path.join(path, exe) - break - if not os.path.exists(exe): - logger.fatal('The executable %s (from --python=%s) does not exist' % (exe, exe)) - raise SystemExit(3) - if not is_executable(exe): - logger.fatal('The executable %s (from --python=%s) is not executable' % (exe, exe)) - raise SystemExit(3) - return exe - -def is_executable(exe): - """Checks a file is executable""" - return os.access(exe, os.X_OK) - -############################################################ -## Relocating the environment: - -def make_environment_relocatable(home_dir): - """ - Makes the already-existing environment use relative paths, and takes out - the #!-based environment selection in scripts. - """ - home_dir, lib_dir, inc_dir, bin_dir = path_locations(home_dir) - activate_this = os.path.join(bin_dir, 'activate_this.py') - if not os.path.exists(activate_this): - logger.fatal( - 'The environment doesn\'t have a file %s -- please re-run virtualenv ' - 'on this environment to update it' % activate_this) - fixup_scripts(home_dir) - fixup_pth_and_egg_link(home_dir) - ## FIXME: need to fix up distutils.cfg - -OK_ABS_SCRIPTS = ['python', 'python%s' % sys.version[:3], - 'activate', 'activate.bat', 'activate_this.py'] - -def fixup_scripts(home_dir): - # This is what we expect at the top of scripts: - shebang = '#!%s/bin/python' % os.path.normcase(os.path.abspath(home_dir)) - # This is what we'll put: - new_shebang = '#!/usr/bin/env python%s' % sys.version[:3] - if is_win: - bin_suffix = 'Scripts' - else: - bin_suffix = 'bin' - bin_dir = os.path.join(home_dir, bin_suffix) - home_dir, lib_dir, inc_dir, bin_dir = path_locations(home_dir) - for filename in os.listdir(bin_dir): - filename = os.path.join(bin_dir, filename) - if not os.path.isfile(filename): - # ignore subdirs, e.g. .svn ones. - continue - f = open(filename, 'rb') - try: - try: - lines = f.read().decode('utf-8').splitlines() - except UnicodeDecodeError: - # This is probably a binary program instead - # of a script, so just ignore it. - continue - finally: - f.close() - if not lines: - logger.warn('Script %s is an empty file' % filename) - continue - if not lines[0].strip().startswith(shebang): - if os.path.basename(filename) in OK_ABS_SCRIPTS: - logger.debug('Cannot make script %s relative' % filename) - elif lines[0].strip() == new_shebang: - logger.info('Script %s has already been made relative' % filename) - else: - logger.warn('Script %s cannot be made relative (it\'s not a normal script that starts with %s)' - % (filename, shebang)) - continue - logger.notify('Making script %s relative' % filename) - script = relative_script([new_shebang] + lines[1:]) - f = open(filename, 'wb') - f.write('\n'.join(script).encode('utf-8')) - f.close() - -def relative_script(lines): - "Return a script that'll work in a relocatable environment." - activate = "import os; activate_this=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'activate_this.py'); execfile(activate_this, dict(__file__=activate_this)); del os, activate_this" - # Find the last future statement in the script. If we insert the activation - # line before a future statement, Python will raise a SyntaxError. - activate_at = None - for idx, line in reversed(list(enumerate(lines))): - if line.split()[:3] == ['from', '__future__', 'import']: - activate_at = idx + 1 - break - if activate_at is None: - # Activate after the shebang. - activate_at = 1 - return lines[:activate_at] + ['', activate, ''] + lines[activate_at:] - -def fixup_pth_and_egg_link(home_dir, sys_path=None): - """Makes .pth and .egg-link files use relative paths""" - home_dir = os.path.normcase(os.path.abspath(home_dir)) - if sys_path is None: - sys_path = sys.path - for path in sys_path: - if not path: - path = '.' - if not os.path.isdir(path): - continue - path = os.path.normcase(os.path.abspath(path)) - if not path.startswith(home_dir): - logger.debug('Skipping system (non-environment) directory %s' % path) - continue - for filename in os.listdir(path): - filename = os.path.join(path, filename) - if filename.endswith('.pth'): - if not os.access(filename, os.W_OK): - logger.warn('Cannot write .pth file %s, skipping' % filename) - else: - fixup_pth_file(filename) - if filename.endswith('.egg-link'): - if not os.access(filename, os.W_OK): - logger.warn('Cannot write .egg-link file %s, skipping' % filename) - else: - fixup_egg_link(filename) - -def fixup_pth_file(filename): - lines = [] - prev_lines = [] - f = open(filename) - prev_lines = f.readlines() - f.close() - for line in prev_lines: - line = line.strip() - if (not line or line.startswith('#') or line.startswith('import ') - or os.path.abspath(line) != line): - lines.append(line) - else: - new_value = make_relative_path(filename, line) - if line != new_value: - logger.debug('Rewriting path %s as %s (in %s)' % (line, new_value, filename)) - lines.append(new_value) - if lines == prev_lines: - logger.info('No changes to .pth file %s' % filename) - return - logger.notify('Making paths in .pth file %s relative' % filename) - f = open(filename, 'w') - f.write('\n'.join(lines) + '\n') - f.close() - -def fixup_egg_link(filename): - f = open(filename) - link = f.readline().strip() - f.close() - if os.path.abspath(link) != link: - logger.debug('Link in %s already relative' % filename) - return - new_link = make_relative_path(filename, link) - logger.notify('Rewriting link %s in %s as %s' % (link, filename, new_link)) - f = open(filename, 'w') - f.write(new_link) - f.close() - -def make_relative_path(source, dest, dest_is_directory=True): - """ - Make a filename relative, where the filename is dest, and it is - being referred to from the filename source. - - >>> make_relative_path('/usr/share/something/a-file.pth', - ... '/usr/share/another-place/src/Directory') - '../another-place/src/Directory' - >>> make_relative_path('/usr/share/something/a-file.pth', - ... '/home/user/src/Directory') - '../../../home/user/src/Directory' - >>> make_relative_path('/usr/share/a-file.pth', '/usr/share/') - './' - """ - source = os.path.dirname(source) - if not dest_is_directory: - dest_filename = os.path.basename(dest) - dest = os.path.dirname(dest) - dest = os.path.normpath(os.path.abspath(dest)) - source = os.path.normpath(os.path.abspath(source)) - dest_parts = dest.strip(os.path.sep).split(os.path.sep) - source_parts = source.strip(os.path.sep).split(os.path.sep) - while dest_parts and source_parts and dest_parts[0] == source_parts[0]: - dest_parts.pop(0) - source_parts.pop(0) - full_parts = ['..']*len(source_parts) + dest_parts - if not dest_is_directory: - full_parts.append(dest_filename) - if not full_parts: - # Special case for the current directory (otherwise it'd be '') - return './' - return os.path.sep.join(full_parts) - - - -############################################################ -## Bootstrap script creation: - -def create_bootstrap_script(extra_text, python_version=''): - """ - Creates a bootstrap script, which is like this script but with - extend_parser, adjust_options, and after_install hooks. - - This returns a string that (written to disk of course) can be used - as a bootstrap script with your own customizations. The script - will be the standard virtualenv.py script, with your extra text - added (your extra text should be Python code). - - If you include these functions, they will be called: - - ``extend_parser(optparse_parser)``: - You can add or remove options from the parser here. - - ``adjust_options(options, args)``: - You can change options here, or change the args (if you accept - different kinds of arguments, be sure you modify ``args`` so it is - only ``[DEST_DIR]``). - - ``after_install(options, home_dir)``: - - After everything is installed, this function is called. This - is probably the function you are most likely to use. An - example would be:: - - def after_install(options, home_dir): - subprocess.call([join(home_dir, 'bin', 'easy_install'), - 'MyPackage']) - subprocess.call([join(home_dir, 'bin', 'my-package-script'), - 'setup', home_dir]) - - This example immediately installs a package, and runs a setup - script from that package. - - If you provide something like ``python_version='2.5'`` then the - script will start with ``#!/usr/bin/env python2.5`` instead of - ``#!/usr/bin/env python``. You can use this when the script must - be run with a particular Python version. - """ - filename = __file__ - if filename.endswith('.pyc'): - filename = filename[:-1] - f = codecs.open(filename, 'r', encoding='utf-8') - content = f.read() - f.close() - py_exe = 'python%s' % python_version - content = (('#!/usr/bin/env %s\n' % py_exe) - + '## WARNING: This file is generated\n' - + content) - return content.replace('##EXT' 'END##', extra_text) - - - -default_target_dir = 'venv' - -pip_install_packages = filter(len, open('requirements.txt').readlines()) - -import os -import subprocess -import sys - -def adjust_options(options, args): - if len(args)==0: - os.chdir(os.path.dirname(__file__)) - args.append(default_target_dir) - -def after_install(options, home_dir): - from os.path import join - pip = join(home_dir, 'bin/pip') - if not os.path.exists(pip): - # on windows - pip = join(home_dir, 'Scripts/pip.exe') - if not os.path.exists(pip): - print "error", pip, "is missing" - if sys.version_info < (2, 7): - subprocess.call([pip, 'install', 'importlib']) - for prog in pip_install_packages: - subprocess.call([pip, 'install', prog]) - - - -def convert(s): - b = base64.b64decode(s.encode('ascii')) - return zlib.decompress(b).decode('utf-8') - -##file site.py -SITE_PY = convert(""" -eJzFPf1z2zaWv/OvwMqToZTIdOK0vR2nzo2TOK3v3MTbpLO5dT1aSoIs1hTJEqRl7c3d337vAwAB -kpLtTXdO04klEnh4eHhfeHgPHQwGJ0Uhs7lY5fM6lULJuJwtRRFXSyUWeSmqZVLO94u4rDbwdHYT -X0slqlyojYqwVRQET7/yEzwVn5eJMijAt7iu8lVcJbM4TTciWRV5Wcm5mNdlkl2LJEuqJE6Tf0CL -PIvE06/HIDjLBMw8TWQpbmWpAK4S+UJcbKplnolhXeCcX0Tfxi9HY6FmZVJU0KDUOANFlnEVZFLO -AU1oWSsgZVLJfVXIWbJIZrbhOq/TuSjSeCbF3//OU6OmYRiofCXXS1lKkQEyAFMCrALxgK9JKWb5 -XEZCvJGzGAfg5w2xAoY2xjVTSMYsF2meXcOcMjmTSsXlRgyndUWACGUxzwGnBDCokjQN1nl5o0aw -pLQea3gkYmYPfzLMHjBPHL/LOYDjxyz4JUvuxgwbuAfBVUtmm1IukjsRI1j4Ke/kbKKfDZOFmCeL -BdAgq0bYJGAElEiT6UFBy/G9XqHXB4SV5coYxpCIMjfml9QjCs4qEacK2LYukEaKMH8np0mcATWy -WxgOIAJJg75x5omq7Dg0O5EDgBLXsQIpWSkxXMVJBsz6UzwjtP+aZPN8rUZEAVgtJX6rVeXOf9hD -AGjtEGAc4GKZ1ayzNLmR6WYECHwG7Eup6rRCgZgnpZxVeZlIRQAAtY2Qd4D0WMSl1CRkzjRyOyb6 -E02SDBcWBQwFHl8iSRbJdV2ShIlFApwLXPH+48/i3embs5MPmscMMJbZ6xXgDFBooR2cYABxUKvy -IM1BoKPgHP+IeD5HIbvG8QGvpsHBvSsdDGHuRdTu4yw4kF0vrh4G5liBMqGxAur339BlrJZAn/+5 -Z72D4GQbVWji/G29zEEms3glxTJm/kLOCL7XcF5HRbV8BdygEE4FpFK4OIhggvCAJC7NhnkmRQEs -liaZHAVAoSm19VcRWOFDnu3TWrc4ASCUQQYvnWcjGjGTMNEurFeoL0zjDc1MNwnsOq/ykhQH8H82 -I12UxtkN4aiIofjbVF4nWYYIIS8E4V5IA6ubBDhxHolzakV6wTQSIWsvbokiUQMvIdMBT8q7eFWk -cszii7p1txqhwWQlzFqnzHHQsiL1SqvWTLWX9w6jLy2uIzSrZSkBeD31hG6R52MxBZ1N2BTxisWr -WufEOUGPPFEn5AlqCX3xO1D0RKl6Je1L5BXQLMRQwSJP03wNJDsKAiH2sJExyj5zwlt4B/8CXPw3 -ldVsGQTOSBawBoXIbwOFQMAkyExztUbC4zbNym0lk2SsKfJyLksa6mHEPmDEH9gY5xp8yCtt1Hi6 -uMr5KqlQJU21yUzY4mVhxfrxFc8bpgGWWxHNTNOGTiucXlos46k0LslULlAS9CK9sssOYwY9Y5It -rsSKrQy8A7LIhC1Iv2JBpbOoJDkBAIOFL86Sok6pkUIGEzEMtCoI/ipGk55rZwnYm81ygAqJzfcM -7A/g9g8Qo/UyAfrMAAJoGNRSsHzTpCrRQWj0UeAbfdOfxwdOPVto28RDLuIk1VY+zoIzenhaliS+ -M1lgr7EmhoIZZhW6dtcZ0BHFfDAYBIFxhzbKfM1VUJWbI2AFYcaZTKZ1goZvMkFTr3+ogEcRzsBe -N9vOwgMNYTp9ACo5XRZlvsLXdm6fQJnAWNgj2BMXpGUkO8geJ75C8rkqvTBN0XY77CxQDwUXP5++ -P/ty+kkci8tGpY3b+uwKxjzNYmBrsgjAVK1hG10GLVHxJaj7xHsw78QUYM+oN4mvjKsaeBdQ/1zW -9BqmMfNeBqcfTt6cn05++XT68+TT2edTQBDsjAz2aMpoHmtwGFUEwgFcOVeRtq9Bpwc9eHPyyT4I -JomafPcNsBs8GV7LCpi4HMKMxyJcxXcKGDQcU9MR4thpABY8HI3Ea3H49OnLQ4JWbIoNAAOz6zTF -hxNt0SdJtsjDETX+jV36Y1ZS2n+7PPrmShwfi/C3+DYOA/ChmqbMEj+ROH3eFBK6VvBnmKtREMzl -AkTvRqKADp+SXzziDrAk0DLXdvq3PMnMe+ZKdwjSH0PqAThMJrM0VgobTyYhEIE69HygQ8TONUrd -EDoWG7frSKOCn1LCwmbYZYz/9KAYT6kfosEoul1MIxDX1SxWklvR9KHfZII6azIZ6gFBmEliwOFi -NRQK0wR1VpmAX0uchzpsqvIUfyJ81AIkgLi1Qi2Ji6S3TtFtnNZSDZ1JARGHwxYZUdEmivgRXJQh -WOJm6UajNjUNz0AzIF+agxYtW5TDzx74O6CuzCYON3q892KaIab/wTsNwgFczhDVvVItKKwdxcXp -hXj5/HAf3RnYc84tdbzmaKGTrJb24QJWy8gDI8y9jLy4dFmgnsWnR7thriK7Ml1WWOglLuUqv5Vz -wBYZ2Fll8TO9gZ05zGMWwyqCXid/gFWo8Rtj3Ify7EFa0HcA6q0Iill/s/R7HAyQmQJFxBtrIrXe -9bMpLMr8NkFnY7rRL8FWgrJEi2kcm8BZOI/J0CSChgAvOENKrWUI6rCs2WElvBEk2ot5o1gjAneO -mvqKvt5k+Tqb8E74GJXucGRZFwVLMy82aJZgT7wHKwRI5rCxa4jGUMDlFyhb+4A8TB+mC5SlvQUA -AkOvaLvmwDJbPZoi7xpxWIQxeiVIeEuJ/sKtGYK2WoYYDiR6G9kHRksgJJicVXBWNWgmQ1kzzWBg -hyQ+151HvAX1AbSoGIHZHGpo3MjQ7/IIlLM4d5WS0w8t8pcvX5ht1JLiK4jYFCeNLsSCjGVUbMCw -JqATjEfG0RpigzU4twCmVpo1xf4nkRfsjcF6XmjZBj8AdndVVRwdHKzX60hHF/Ly+kAtDr7983ff -/fk568T5nPgHpuNIiw61RQf0Dj3a6HtjgV6blWvxY5L53EiwhpK8MnJFEb8f6mSei6P9kdWfyMWN -mcZ/jSsDCmRiBmUqA20HDUZP1P6T6KUaiCdknW3b4Yj9Em1SrRXzrS70qHLwBMBvmeU1muqGE5R4 -BtYNduhzOa2vQzu4ZyPND5gqyunQ8sD+iyvEwOcMw1fGFE9QSxBboMV3SP8zs01M3pHWEEheNFGd -3fOmX4sZ4s4fLu/W13SExswwUcgdKBF+kwcLoG3clRz8aNcW7Z7j2pqPZwiMpQ8M82rHcoiCQ7jg -WoxdqXO4Gj1ekKY1q2ZQMK5qBAUNTuKUqa3BkY0MESR6N2azzwurWwCdWpFDEx8wqwAt3HE61q7N -Co4nhDxwLF7QEwku8lHn3XNe2jpNKaDT4lGPKgzYW2i00znw5dAAGItB+cuAW5ptysfWovAa9ADL -OQaEDLboMBO+cX3Awd6gh506Vn9bb6ZxHwhcpCHHoh4EnVA+5hFKBdJUDP2e21jcErc72E6LQ0xl -lolEWm0Rrrby6BWqnYZpkWSoe51FimZpDl6x1YrESM1731mgfRA+7jNmWgI1GRpyOI2OydvzBDDU -7TB8dl1joMGNwyBGq0SRdUMyLeEfcCsovkHBKKAlQbNgHipl/sT+AJmz89VftrCHJTQyhNt0mxvS -sRgajnm/J5CMOhoDUpABCbvCSK4jq4MUOMxZIE+44bXcKt0EI1IgZ44FITUDuNNLb4ODTyI8ASEJ -Rch3lZKFeCYGsHxtUX2Y7v5DudQEIYZOA3IVdPTi2I1sOFGN41aUw2doP75BZyVFDhw8BZfHDfS7 -bG6Y1gZdwFn3FbdFCjQyxWEGIxfVK0MYN5j8p2OnRUMsM4hhKG8g70jHjDQK7HJr0LDgBoy35u2x -9GM3YoF9h2GuDuXqDvZ/YZmoWa5Cipm0YxfuR3NFlzYW2/NkOoA/3gIMRlceJJnq+AVGWf6JQUIP -etgH3ZsshkXmcblOspAUmKbfsb80HTwsKT0jd/CJtlMHMFGMeB68L0FA6OjzAMQJNQHsymWotNvf -BbtzigMLl7sPPLf58ujlVZe4420RHvvpX6rTu6qMFa5WyovGQoGr1TXgqHRhcnG20YeX+nAbtwll -rmAXKT5++iKQEBzXXcebx029YXjE5t45eR+DOui1e8nVmh2xCyCCWhEZ5SB8PEc+HNnHTm7HxB4B -5FEMs2NRDCTNJ/8MnF0LBWPszzcZxtHaKgM/8Pq7byY9kVEXye++GdwzSosYfWI/bHmCdmROKtg1 -21LGKbkaTh8KKmYN69g2xYj1OW3/NI9d9ficGi0b++5vgR8DBUPqEnyE5+OGbN2p4sd3p7bC03Zq -B7DObtV89mgRYG+fT3+DHbLSQbXbOEnpXAEmv7+PytVs7jle0a89PEg7FYxDgr79l7p8DtwQcjRh -1J2OdsZOTMC5ZxdsPkWsuqjs6RyC5gjMywtwjz+7ULUFM4z7nI8XDntUkzfjPmfia9Qqfv4QDWSB -eTQY9JF9Kzv+f8zy+b9mkg+cijm5/gOt4SMB/VEzYePB0LTx8GH1L7trdw2wB5inLW7nDrewOzSf -VS6Mc8cqSYmnqLueijWlK1BsFU+KAMqc/b4eOLiM+tD7bV2WfHRNKrCQ5T4ex44FZmoZz6/XxOyJ -gw+yQkxssxnFqp28nrxPjYQ6+mxnEjb7hn45W+YmZiWz26SEvqBwh+GPH386DftNCMZxodPDrcjD -/QaE+wimDTVxwsf0YQo9pss/L1XtrYtPUJMRYCLCmmy99sEPBJs4Qv8a3BMR8g5s+Zgdd+izpZzd -TCSlDiCbYlcnKP4WXyMmNqPAz/9S8YKS2GAms7RGWrHjjdmHizqb0flIJcG/0qnCmDpECQEc/luk -8bUYUuc5hp40N1J06jYutfdZlDkmp4o6mR9cJ3Mhf6/jFLf1crEAXPDwSr+KeHiKQIl3nNPASYtK -zuoyqTZAgljl+uyP0h+chtMNT3ToIcnHPExATIg4Ep9w2vieCTc35DLBAf/EAyeJ+27s4CQrRPQc -3mf5BEedUI7vmJHqnsvT46A9Qg4ABgAU5j8Y6cid/0bSK/eAkdbcJSpqSY+UbqQhJ2cMoQxHGOng -3/TTZ0SXt7Zgeb0dy+vdWF63sbzuxfLax/J6N5auSODC2qCVkYS+wFX7WKM338aNOfEwp/Fsye0w -9xNzPAGiKMwG28gUp0B7kS0+3yMgpLadA2d62OTPJJxUWuYcAtcgkfvxEEtv5k3yutOZsnF0Z56K -cWe35RD5fQ+iiFLFptSd5W0eV3HkycV1mk9BbC264wbAWLTTiThWmt1OphzdbVmqwcV/ff7x4wds -jqAGJr2BuuEiomHBqQyfxuW16kpTs/krgB2ppZ+IQ900wL0HRtZ4lD3+5x1leCDjiDVlKOSiAA+A -srpsMzf3KQxbz3WSlH7OTM6HTcdikFWDZlJbiHRycfHu5PPJgEJ+g/8duAJjaOtLh4uPaWEbdP03 -t7mlOPYBodaxrcb4uXPyaN1wxP021oDt+PCtB4cPMdi9YQJ/lv9SSsGSAKEiHfx9DKEevAf6qm1C -hz6GETvJf+7JGjsr9p0je46L4oh+37FDewD/sBP3GBMggHahhmZn0GymWkrfmtcdFHWAPtDX++ot -WHvr1d7J+BS1k+hxAB3K2mbb3T/vnIaNnpLVm9Mfzj6cn725OPn8o+MCoiv38dPBoTj96Yug/BA0 -YOwTxZgaUWEmEhgWt9BJzHP4r8bIz7yuOEgMvd6dn+uTmhWWumDuM9qcCJ5zGpOFxkEzjkLbhzr/ -CDFK9QbJqSmidB2qOcL90orrWVSu86OpVGmKzmqtt166VszUlNG5dgTSB41dUjAITjGDV5TFXpld -YckngLrOqgcpbaNtYkhKQcFOuoBz/mVOV7xAKXWGJ01nregvQxfX8CpSRZrATu5VaGVJd8P0mIZx -9EN7wM149WlApzuMrBvyrLdigVbrVchz0/1HDaP9XgOGDYO9g3lnktJDKAMbk9tEiI34JCeUd/DV -Lr1eAwULhgd9FS6iYboEZh/D5losE9hAAE8uwfriPgEgtFbCPxA4cqIDMsfsjPDtar7/l1ATxG/9 -6689zasy3f+bKGAXJDiVKOwhptv4HWx8IhmJ04/vRyEjR6m54i81lgeAQ0IBUEfaKX+JT9AnQyXT -hc4v8fUBvtB+Ar1udS9lUeru/a5xiBLwRA3Ja3iiDP1CTPeysMc4lVELNFY+WMywgtBNQzCfPfFp -KdNU57ufvTs/Bd8RizFQgvjc7RSG43gJHqHr5DuucGyBwgN2eF0iG5fowlKSxTzymvUGrVHkqLeX -l2HXiQLD3V6dKHAZJ8pFe4jTZlimnCBCVoa1MMvKrN1qgxR22xDFUWaYJSYXJSWw+jwBvExPY94S -wV4JSz1MBJ5PkZOsMhmLaTIDPQoqFxTqGIQEiYv1jMR5ecYx8LxUpgwKHhabMrleVni6AZ0jKsHA -5j+dfDk/+0BlCYcvG6+7hznHtBMYcxLJMaYIYrQDvrhpf8hVk0kfz+pXCAO1D/xpv+LslGMeoNOP -A4v4p/2K69COnZ0gzwAUVF20xQM3AE63PrlpZIFxtftg/LgpgA1mPhiKRWLZi070cOfX5UTbsmVK -KO5jXj7iAGdR2JQ03dlNSWt/9BwXBZ5zzYf9jeBtn2yZzxS63nTebEt+cz8dKcSSWMCo29ofw2SH -dZrq6TjMto1baFurbeyvmRMrddrNMhRlIOLQ7TxymaxfCevmzIFeGnUHmPheo2sksVeVD37NBtrD -8DCxxO7sU0xHKmMhI4CRDKlrf2rwodAigAKh7N+hI7nj0dNDb46ONbh/jlp3gW38ERShzsWlGo+8 -BE6EL7+z48ivCC3Uo0cidDyVTGa5zRPDz3qJXuULf469MkBBTBS7Ms6u5ZBhjQ3MZz6xt4RgSdt6 -pL5MrvoMizgD5/RuC4d35aL/4MSg1mKETrsbuWmrI5882KC3FGQnwXzwZbwG3V/U1ZBXcss5dG8t -3Xao90PE7ENoqk/fhyGGY34Pt6xPA7iXGhoWeni/bzmF5bUxjqy1j62qptC+0B7srIStWaXoWMYp -TjS+qPUCGoN73Jj8gX2qE4Xs7546MScmZIHy4C5Ib24D3aAVThhwuRJXjiaUDt9U0+h3c3krUzAa -YGSHWO3wm612GEU2nNKbB/bV2F1sLjb9uNGbBrMjU46BnpkqYP2iTFYHiE5vxGcXZg0yuNS/6i1J -nN2Ql/z2r2dj8fbDz/DvG/kRTCkWP47F3wAN8TYvYX/J1bt0rQJWclS8ccxrhRWSBI2OKvgGCnTb -Ljw647GILjHxa0usphSYVVuu+NoTQJEnSBXtjZ9gCifgt6nsanmjxlPsW5SBfok02F7sggUiB7pl -tKxWKdoLJ0rSrObl4Pzs7emHT6dRdYccbn4OnCiKn5CF09FnxCWeh42FfTKr8cmV4zj/KNOix2/W -m05TOIObThHCvqSwG02+UiO2m4u4xMiBKDbzfBZhS2B5rtWr1uBIj5z95b2G3rOyCGs40qdojTeP -j4Ea4te2IhpAQ+qj50Q9CaF4ikVj/Dga9JvisaDQNvx5erOeu5FxXf1DE2xj2sx66He3unDJdNbw -LCcRXsd2GUxBaJrEajWduYWCHzOhb0QBLUfnHHIR12klZAaSS5t8upoCNL1b28cSwqzC5owK3ihM -k67jjXKSkGIlBjjqgKrr8UCGIoawB/8pvmF7gEWHouZaaIBOiNL+KXe6qnq2ZAnmLRFRryfxYJ1k -L918Hk1hHpR3yLPGkYV5otvIGF3LSs+fHwxHly+aTAeKSs+8yt5ZAVbPZZM9UJ3F06dPB+Lf7/d+ -GJUozfMbcMsAdq/Xck6vt1huPTm7Wl3P3ryJgB9nS3kJD64oem6f1xmFJnd0pQWR9q+BEeLahJYZ -TfuWXeagXckHzdyCD6y05fglS+jeIwwtSVS2+vooDDsZaSKWBMUQxmqWJCGHKWA9NnmNRXkYZtT8 -Iu+A4xMEM8a3eELGW+0lepiUQGu5x6JzLAYEeEC5ZTwaVTVTWRrgObnYaDQnZ1lSNfUkz93DU30X -QGWvM9J8JeI1SoaZR4sYTn2nx6qNh53vZFFvx5LPLt2AY2uW/Po+3IG1QdLyxcJgCg/NIs1yWc6M -OcUVS2ZJ5YAx7RAOd6ZbnMj6REEPSgNQ72QV5lai7ds/2XVxMf1I58j7ZiSdPlTZm7E4OBRnrQTD -KGrGpzCUJaTlW/NlBKN8oLC29gS8scSfdFAViwm8CzzcusY60xdzcP5Gc1sHwKHLoKyCtOzo6Qjn -BjILn5l2y3Ua+KEtOuF2m5RVHacTff/DBB22iT1Y13jaeridlZ7WWwEnPwcPeF+n7oPjYLJskJ6Y -emtKM47FQocoIrfEzK/GKnL08g7ZVwKfAikzn5jCaBNEurTsaitOdc6mo+IR1DNTxbTFMzflM53K -ExfzMeU5mbqHLV60waV9kYV4fSyGL8bi29ZGaFZs8GInQPnJPHoyD32fjLpeHh02dqa78WxB2Ark -5dWjp5smU5pe2Jdzfn9fnXSIG8AVyM4ikfP9JwqxY5y/FqqG0sxrO6fQjLEkfc9mPelq7KZGhUrR -puDVrxuF4qgW43/aQUyZt9YDXBGLQssWyFbxm8STVvKfvbcNEwM1ev7Koucy6Tucwm94Wwq81wR1 -HZ2th5Y6rd6C7dmT69pJPoJqGjYcf69H9ShRaueId1rh8WQjcS7rP4KHQ7pZhpjmWetY+F/JPJy0 -v+1wsYPld9/swtNVML1lEj0Lurt2gZe6XbDQLLf59Ie6PEbp6/pVAuNAaUQHvD5z+SP5a0eYD8y3 -uuQ2L3iF1yvSWS/allS6/gfvSfkeLXQIaBNO6VmwFuCS1As8mr2l2yJPFKWR4aUv3xy+GJtaWwak -J/AyevlMX6pI3cx1Ar6zOtabIHip+x1G/+YASyq/t33V2RbQtI5btyv5g4UUjxpFE0uHxnLcX1nR -rFks8BbChpjspNorNd6D2zAFh8FcJ5qD5wM7u6gPXVdjNNK7TbVtEeCtwUP72SY5D+raKFJEepew -bVOeuxTno0VB9+q3ILgXR85fxvwGfaq6OLKxKmNT8Cxx6OZH4qe66a3kYnuCxrW6CXdNn/vvmrtu -EdiZm/SAztz9ik2XBrrvdivaRwOOE2hCPKjooNH4/cbEtQNjnZXSH/PWHyS/2wlnusWs3AfG5MBg -BJ3YU2NvzP4qnrnfMcVqn684dgt0e52N1rQ7NqPN8Q/xFDidBJ/bmn3KEZprDuSNB91ZN+Gs04m8 -vlaTGO9LnNBulTKkOtsQs/95T9fdyVhtzLYFrwECEIabdC6rm64OjAG6ku9t5gQj574XQUNTGq6T -16uSOZsEvUcCcBGHHqm/CW1zYu4glRgxVnVZlLCtHOjbfTnzpS9ZuAFqImGrWN0Y1E2Psb7slRQr -pVuZol4OeLbSZoAIbMQ7pmEyse+AV543FxckY8sMMqtXsoyr5tIe/4w9Ea+dEaiMGxfXiXM1Utni -EhexxPKGgxRGmuz3Z7BD83anO24qGFlt93B2oh46dvqYSxAcY2S4OLmzF/a5F0XN6bJo1zu0zRqu -s5cUwTKY2+dIR+qgE7/VN2Lxra0cEkf/0uEfkHe3ltHP67bqjL1bi4bzzFUI3SuQsAafjHPfzYYd -DujeYdjaodrxfX1hGaXjYW5pbKmoffJehdOMNmpCMZiCeU8oxk+zf2QoxoP/wFCMvocSDI3GR+uB -3sT7e2I2rB7cSx0bRoA+EyASHgm3rgQ0pnLoprEXuUruBvaKZtaVTm2cMQ/Ikd3bvggEX96o3Jxf -73K1XaEYX7ro8Q/nH9+cnBMtJhcnb//z5AdKc8Jzh5atenCsKsv3mdr7XkK1G7fSqSl9gzfY9ty5 -ylVBGkLnfedUvwdCfwVY34K2FZn7eluHTiVNtxMgvnvaLajbVHYv5I5fpqs23ISUVuZzoJ9ymqr5 -5Zz1m0fmyIvFoTnSMu+bUwgto50g7baFcxJGu+pE+6v6Xs0tAeSRTVumFcDDB+Qve/ZgalBshJsd -lPb/OINyrbF+z9xJA1I4k87diHQtIoOq/P9DRwnKLsa9HTuKY3vbNbXjcxZlr3HHQ9SZjAxBvAK6 -QXd+rrDPZbqFCkHACk/f/MeIGP2nTybtOf4TJS73qVR3H5XNlf2Fa6ad278meFpf2Ru0FKf88Hkl -NF7UqXsCb/t0OpDTR8c6+cKpDQHNdwB0bsRTAXujv8QKcboRIWwctUuG6aZER339nYM82k0He0Or -52J/WyGnW8goxIvtDeetWknd45B7qHt6qNqUyzkWGPMet1VoitcEmc8FBV2Z5TkfeBitt/3w9fby -xZGN0iO/42tHkVB+1sAx7JdOfuPOaxqd7sQs5ZgS4HCv5tT36hZXDlT2CbbtbTpFHlv2PyZhgCEN -vPf9ITPTw7vMftDG1LLeEUxJDJ+oEU3LKYvRuNsno+50G7XVBcIlPg8A0lGBAAvBdHSjk3K54bzp -4XO9G5zWdMGte1QTOlJB6Vc+R3AP4/s1+LW7U2nug7oziqY/N2hzoF5yEG72HbjVyAuFbDcJ7ak3 -fLDFBeAq5/7+Lx7Qv5sYaLsf7vKrbauXvZV17MtiLimm2LRIZB5HYGRAbw5JW2MBghF0vNiloaPL -UM3ckC/Q8aP8VLy+mjYY5MxOtAdgjULwf2RtvCc= -""") - -##file ez_setup.py -EZ_SETUP_PY = convert(""" -eJzNWmmP20YS/a5fwSgYSIJlDu9DhrzIJg5gIMgGuYCFPavpc8SYIhWS8li7yH/f181DJDWcJIt8 -WAbOzJDN6qpXVa+qWvr8s+O52ufZbD6f/z3Pq7IqyNEoRXU6VnmelkaSlRVJU1IlWDR7K41zfjIe -SVYZVW6cSjFcq54WxpGwD+RBLMr6oXk8r41fTmWFBSw9cWFU+6ScySQV6pVqDyHkIAyeFIJVeXE2 -HpNqbyTV2iAZNwjn+gW1oVpb5Ucjl/VOrfzNZjYzcMkiPxji3zt930gOx7yolJa7i5Z63fDWcnVl -WSF+PUEdgxjlUbBEJsz4KIoSIKi9L6+u1e9YxfPHLM0Jnx2SosiLtZEXGh2SGSStRJGRSnSLLpau -9aYMq3hulLlBz0Z5Oh7Tc5I9zJSx5Hgs8mORqNfzo3KCxuH+fmzB/b05m/2oYNK4Mr2xkiiM4oTf -S2UKK5KjNq/xqtby+FAQ3vejqYJh1oBXnsvZV2++/uKnb37c/fzm+x/e/uNbY2vMLTNgtj3vHv30 -/TcKV/VoX1XHze3t8XxMzDq4zLx4uG2Cory9KW/xX7fb7dy4UbuYDb7vNu7dbHbg/o6TikDgf7TH -Fpc3XmJzar88nh3TNcXDw2JjLKLIcRiRsWU7vsUjL6JxHNBQOj4LRMDIYv2MFK+VQsOYRMSzXOH5 -liMpjXwhXGnHnh26PqMTUpyhLn7gh6Ef84gEPJLM86zQIjG3Qid0eBw/L6XTxYMBJOJ2EHOHiiCw -JXEdEgjfEZ6MnCmL3KEulLo2syQL3TgmgeuHcRz6jPBY+sQK7OhZKZ0ubkQihrs8EIw7juOF0g5j -GXISBLEkbEKKN9QlcCzPJ44nuCdsQVkYSmG5MSGeCGQo/GelXHBh1CF25EOPiBMmJXW4DX0sl7rU -Zt7TUtgoXqgrHer7bswD+DWUoUd4GNsOBJHYiiYsYuN4gT1ccCAZhNzhjpTC9iwrdgNPOsSb8DSz -raEyDHA4hPrcJZbjB54fwD/MdiPLIqEVW8+L6bTxQ44X4aOYRlYYOsyPie+SyHNd4nM+iUwtxm/F -cOEFhEXAMg5ZFPt+6AhfRD7CUdCIhc+LCTptIoFMIkJaAQBymAg824M0B0YC8Alvg1SG2DiUCIIc -tl2O95FGTiRCSnzqE2jExfNiLp7igRvLmFoQ5jHP8eLQcj0umCOYxZxJT9lDbAKPxZ50qQxJiCh0 -BYtcYVEH7g69mDrPi+mwoZLEjm1ZlMNNHDkBSYJzF44PPCsKJsSMeEZaVuBRGRDi0JBbUAvIeghs -K7JD5kw5asQzgR3YsSMEc33phQJeswPGA2I7kOqEU1JGPCPtCAQF8uUSoUIcP2YxpEibhzSM5ARb -sRHPCEvw0Asih8VxRCUNgXRkIXot+Dy0p5ztDp1EqJB2IDmHYb7v217k2SwEf/E4igN/SsqIrahF -Y9u1CSPUdSyAAZ4LpecxH0QR2vJZKZ1FCBKJPQPuSSpdZBSVsRcwC1CB9cRUwHhDiyLF1iB+12Gc -xix0KJMe6MsJpBMROcVW/tAiIWLJIwvqICERsdIV4HQ/BGHwyA6mPO0PLSISXMUlqoodWrYQADdE -cfIpQ8EjwRTL+CMfRdyVAQjBY4yQKLQ9BA53Q8oYd7nPJ6QEQ4uQMBGqfGTbASpRFHmhAxGomL4X -I7WniDMYVTfmB0T6IQW+6B6QDYEFQzzPRYL5ZIobgqFF1JERCX0HxR60S10UaQuu5sKXaCV8d0JK -OKI7Cz6SMeHMJYHtC9+2faQhWooIFDgZL+GoEpBIxr6HKsDB5ZakQcikLR24AY+cqQwIhxZ5qLEE -fCvRMiABPdezbVtyEbk2/oVTukSjbshSvZATA5GYo36oEASBR66lGivreSmdRYwSNwI3oOfwIpdZ -KmYRbQCbobJMloFoaJEdOnYIkoOjY85s3/Jji/gRdQXyPPanPB0PLYLuzLPQzNgKYerFgfCYpMKK -YCuzpjwdj5gBQYbGDrXVjSIegJ2IEFYA8mKB6031d42UziIp4FpX+MQOqe0wuIn5nk1D1F5UfjFV -SeJhPWIEaWNLxZrEERzEZMcuKltI/dhBjwMpv816EwHGm3JWFedNPXDtSblPE9rOW+jdZ+ITExg1 -3uo7b9RI1KzFw/66GRfS2H0kaYJuX+xwawmddhnmwbWhBoDVRhuQSKO9r2bGdjyoH6qLJ5gtKowL -SoR+0dyLT/VdzHftMshpVn627aS8a0XfXeSpC3MXpsHXr9V0UlZcFJjrloMV6porkxoLmvnwBlMY -wRjGPzOM5Xd5WSY07Y1/GOnw9+Fvq/mVsJvOzMGj1eAvpY/4lFRLp75fwLlFpuGqAR0Nh3pRM15t -R8PculNrR0kptr2Bbo1JcYdRdZuXJjsV+K0Opu4FLlJy3tr+rHESxsYvTlV+AA4M0+UZo2jGbzuz -eycFaq4/kA/wJYbnj4CKKIAAnjLtSKp9Pc7fN0rfG+U+P6VcTbOkxrovrZ3Ms9OBisKo9qQyMAh3 -grUsNQFnCl1DYurtlDplXL8ijPsBEPeGGmmXj/uE7dvdBbRWRxO1PGNxu1iZULJG6V5tqeT0jjH2 -ohgckDwmmLnpJRIEXyMi6wDXKmc58EgLQfj5oj72eCt76mnY9XbN2YQWUzVaamlUaFUaQPSJBcsz -XtbYtGocCQJFgQpEVFolVQLXZQ+984za4439eSb0eUJ9NsJrvQBqnioMnzwfUVo2hw2iEabPcor8 -hJ1ErUqdZ8Q4iLIkD6I+4Lgk3f29jpeCJKUwfjiXlTi8+aTwympHZAapcK8+2SBUUYsyXoWgMqY+ -9TDbCNU/H0m5q1kI9m+NxfHDw64QZX4qmCgXimHU9oecn1JRqlOSHoGOH9c5gazjiIMGtuXqwiQq -5LaXpOnlZYPYKAXbtFuPEu3CAW2SmEBWFNXSWqtNeiTXEHW306v+6Q5tj/l2jWN2mpi3SkbtIBD7 -WNYAIP3wCYbvXmoJqQ9I8+h6h4Foswmu5fyi8evt/EUD1epVI7uvwlDAz/XKL/NMpgmrAM2mz/59 -z/9Ztp//uL9E/0S8L19vb8pVl8ttDuujzPfZkPDnjGSLSqVUlyLgDHV8p3OkOa5T2XLKMoSyaXyX -CkRIu/xKnsohlcogIAFbWg1lUpQA4lSqdFhAwrl1vfHyp57yC3Mk7332Plt+eSoKSAOd1wJuilHd -WqFqXWJZmKR4KN9Zd8/XrCd991WCwEzoSdXRb/Pq6xzs3AsUUpazJtvS4ZvrfkK+G6XznXrlc4Ci -CT//MKiZ/RCti+dTmfpXV1CVz8i4Qen86ok6qTOTXHjeSHNWdxmaEWsbkqo+9NVdw/9p3axZVx3r -t3Xz98qmuqd2va6ZNZXfX8rgRKnL6wLX1jdVJ1h1IunFiKZuDGtD+6lBgfJBHUTWHvGY1kHbtqBb -o8dPL29KtNM3peqm5/1cGJ1q14EPuf1yoDAzXgy7vpJ8FNB+iy675vlf8iRbtlWhXVqLKwumxOnW -91sU6LZbVuzTvo68K6tyWYtdbVQyfPExT1QAHQVRJbBVp+ySbUDR6tKhyCFIoVG2KKX5w2CV6q+V -X4bvqgsrzUdSZEuF88u/7qo/9Gi4siHn8qkov9EhoT4MWYqPIlN/wJwjlJ3tRXpUrdzbOtp67UQX -Kug3VPyrj2uWCooZWH5tgKpm6tYB6ZwJAIlXkIeqmQXpikdFsQQTalnqt/u0rknZnDVbgo2btuWy -I1TmbTSbs9kSjCg2CmEt5kDYXnVQPBd1rdnDvVCiesyLD82ma+NYF4ycVqT5qE0xhWaJG5CpYhEg -wHQjrhdA8iUTm8wpRFOA+gaYq7/SiwiK9VXI9Ej3qkfSUbZW2XT1GpoEHaxVoobFphdKhTi+qn8s -R+3UMDpbGtalrpzrLUalTKdcww8mfuZHkS2vln1ufI8+/vaxSCqQD3wMfHUHDQ7/sFaf9j0q76kO -gBUqDUGNLC+Kkw6OVIyEab/3w0M11pXQ61tObK/mk7OpuRoGmGrGWK6GGtcsoq2puWI9f6RzwIkH -prajnqy7lzDfqTlvM6YAbLDRu7A0L8VydUURZbXRQvvPm2rWkhYUTNUvLW3N/sil6vcBkb5ED/Jx -PVWxLzX37XOfg+oa+wbdUrOqLRBP9cejz5efa47reaDj6iuJlzXPzwx6+Lauu6zhZDAYDLTPVGr0 -xgGWHw4w1By0he0JDWlmrPZqfKQhTlELNM6rF+oA5W6lw/RRLAod1sJQZfx3Q0VZqnAe1Sql9nUN -waJThqHuw7IzS6TlsMHvmbbbNWjtdsYWU55lWqa9+NNd/z9B8Jpc1ahLyzwVyNWJabft41FM6l79 -qkcvxCH/qPlWe6L+GoMealE5KlBv+ju8O2q+J7vsJql+HTYrvWGq3+1cz3d/YEbDz2ea+dEgtpmO -9v85JJ9Ls07w70q5iuan8q5Nt7vhGK7BtlYIfFilqj8cx3SkqCdPR6ja5S8CoFNfa37BZbCldqAO -8/kPV23RfN0yyhwk+KALUaFOdBGEaJIuAT1/Qt5i+T3aqXn7hRvzeB4OlPP6qzTX3zYxV4vmpPLY -1ad2hCkv9PyTfmqoFKGnJK1e1ke/EPmgJsWzYuR+FBfN/KN6rfaouBN7AUT33JfuWv2pViwvXbUW -0tZCXTQXBV1cnnUnx+rdu+bUWbZF9cmTZ9kVu3oErEv0u7n646bY4N8aXIHxoek064as3chE8T2U -y9Vd97JZwuKudB7VUDGf15NCXaT7wMADGCGrdmLQXxHatnfNB1HVSavuL/uT9E53DLtdE/UdJI2M -taFhedW0RC0Ar8bGHkiFaXALPc1SkILtl/P3Wf8rPu+z5bt//Xb3YvXbXLcnq/4Yo9/ucdETjI1C -rr9klRpCscBn8+skbRmxVhX/f7fRgk3dei/t1R3GMA3kC/20fojRFY82d0+bv3hsYkI27VGneg+A -GcxocdxuF7udStjdbtF9sJEqiVBT5/BrR5fD9u939h3eefkSYNWp0itfvdzpljubu6fqouaIi0y1 -qL7+C1AkCcw= -""") - -##file distribute_from_egg.py -DISTRIBUTE_FROM_EGG_PY = convert(""" -eJw9j8tqAzEMRfcG/4MgmxQyptkGusonZBmGoGTUGYFfWPKE6dfXTkM3gqt7rh47OKP3NMF3SQFW -LlrRU1zhybpAxoKBlIqcrNnBdRjQP3GTocYfzmNrrCPQPN9iwzpxSQfQhWBi0cL3qtRtYIG/4Mv0 -KApY5hooqrOGQ05FQTaxptF9Fnx16Rq0XofjaE1XGXVxHIWK7j8P8EY/rHndLqQ1a0pe3COFgHFy -hLLdWkDbi/DeEpCjNb3u/zccT2Ob8gtnwVyI -""") - -##file distribute_setup.py -DISTRIBUTE_SETUP_PY = convert(""" -eJztPGtz2ziS3/UrcHK5SOUkxs7MzV25TlOVmTizrs0mKdvZ/ZC4aIiEJI75GpC0ov311403SEp2 -LrMfruq8O7ZENBqNfncDzMm/1ft2W5WT6XT6S1W1TctpTdIM/marrmUkK5uW5jltMwCaXK3JvurI -jpYtaSvSNYw0rO3qtqryBmBxlJOaJg90w4JGDkb1fk5+75oWAJK8Sxlpt1kzWWc5oocvgIQWDFbl -LGkrvie7rN2SrJ0TWqaEpqmYgAsibFvVpFrLlTT+i4vJhMDPmleFQ30sxklW1BVvkdrYUivg/Ufh -bLBDzv7ogCxCSVOzJFtnCXlkvAFmIA126hw/A1Ra7cq8oumkyDiv+JxUXHCJloTmLeMlBZ5qILvj -uVg0Aai0Ik1FVnvSdHWd77NyM8FN07rmVc0znF7VKAzBj/v7/g7u76PJ5BbZJfibiIURIyO8g88N -biXhWS22p6QrqKw3nKauPCNUioliXtXoT822a7PcfNubgTYrmP68LgvaJlszxIoa6THfKXe/wo5q -yhs2mRgB4hqNllxebSaTlu8vrJCbDJVTDn+6ubyOb65uLyfsa8JgZ1fi+SVKQE4xEGRJ3lclc7Dp -fXQr4HDCmkZqUsrWJJa2ESdFGr6gfNPM5BT8wa+ALIT9R+wrS7qWrnI2n5F/F0MGjgM7eemgjxJg -eCiwkeWSnE0OEn0CdgCyAcmBkFOyBiFJgsir6Ic/lcgT8kdXtaBr+LgrWNkC69ewfAmqasHgEWKq -wRsAMQWSHwDMD68Cu6QmCxEy3ObMH1N4Avgf2D6MD4cdtgXT02YakFMEHMApmP6Q2vRnS4FgHXxQ -KzZ3felUTdTUFIwyhE8f43+8vrqdkx7TyAtXZm8u377+9O42/vvl9c3Vh/ew3vQs+in64cepGfp0 -/Q4fb9u2vnj5st7XWSRFFVV881L5yOZlA34sYS/Tl9ZtvZxObi5vP328/fDh3U389vVfL9/0FkrO -z6cTF+jjX3+Lr96//YDj0+mXyd9YS1Pa0sXfpbe6IOfR2eQ9uNkLx8InZvS0mdx0RUHBKshX+Jn8 -pSrYogYKxffJ6w4o5+7nBStolssn77KElY0CfcOkfxF48QEQBBI8tKPJZCLUWLmiEFzDCv7OtW+K -ke3LcDbTRsG+QoxKhLaKcCDhxWBb1OBSgQfa30TFQ4qfwbPjOPiRaEd5GQaXFgkoxWkTzNVkCVjl -abxLARHow4a1yS5VGIzbEFBgzFuYE7pTBRQVREgnF1U1K/W2LEys9qH27E2OkrxqGIYja6GbShGL -mzaBwwCAg5FbB6Jq2m6j3wFeETbHhzmol0Pr57O72XAjEosdsAx7X+3IruIPLsc0tEOlEhqGrSGO -KzNI3hhlD2aufymr1vNogY7wsFygkMPHF65y9DyMXe8GdBgyB1huBy6N7HgFH9OOa9Vxc5vIoaOH -hTEBzdAzkwJcOFgFoavqkfUnoXJmbVJBGNWu+5UHoPyNfLjOSlh9TJ+k+lncMuRGvGg5Y0bblOGs -ugzA2WYTwn9zYuynrWIE+3+z+T9gNkKGIv6WBKQ4gugXA+HYDsJaQUh5W04dMqPFH/h7hfEG1UY8 -WuA3+MUdRH+Kksr9Sb3XusdZ0+Wtr1pAiARWTkDLAwyqaRsxbGngNIOc+uqDSJbC4Neqy1MxS/BR -Wutmg9apbCSFLamkO1T5+9yk4fGKNkxv23mcspzu1arI6L6SKPjABu7FabOo96dpBP9Hzo6mNvBz -SiwVmGaoLxAD1xVo2MjD87vZ89mjjAYINntxSoQD+z9Ea+/nAJes1j3hjgSgyCKRfPDAjLfh2ZxY -+at83C/UnKpkpctUnTLEoiBYCsOR8u4VRWrHy17S1uPA0kncRrkhd7BEA+j4CBOW5/8xB+HEa/rA -lre8Y8b3FlQ4gKaDSnIn0nmho3TVVDmaMfJiYpdwNA1A8G/ocm9Hm1hyiaGvDeqHTQwmJfLIRqTV -yN+iSrucNVjafTG7CSxX+oBDP+19cUTjrecDSOXc0oa2LQ89QDCUOHWi/mhZgLMVB8frAjHkl+x9 -EOUcbDVlIA4VWmamjM7f4y0OM89jRqT6CuHUsuTn5RTqMrXebISw/j58jCqV/7Uq13mWtP7iDPRE -1jOJ8CfhDDxKX3SuXg25j9MhFEIWFO04FN/hAGJ6K3y72FjqtkmcdlL48/IUiqisEaKmj1BCiOrq -Szkd4sPuT0LLoMVEShk7YN5tsbMhWkKqkwGfeFdifInIx5yBgEbx6W4HJUXFkdQE00JN6DrjTTsH -4wQ0o9MDQLzXTocsPjn7CqIR+C/llzL8teMcVsn3EjE55TNA7kUAFmEWi5nFUJml0LI2fOWPsbwZ -sRDQQdIzOsfCP/c8xR1OwdgselHVw6EC+1vs4VlR5JDNjOq1yXZg1fdV+7bqyvS7zfZJMsdIHKRC -xxxWnHBGW9b3VzFuTligybJExDoSqL83bImfkdilQpZyxFCkv7FtSWOvIrSa5icYX14lol4SrVnF -+ayV3caSFkxmjfeK9nvICkVytsIW6iPNMw+7Nr2yK1aMg0lTYcvGLQhc2LIUWbFo45jeKaiBmMLI -vcePe4KNlxCcRLLVq7MylZET+8qUBC+DWUTuJU/ucUWvOAAHwzjTWaSp5PQqLI3kHgUHzXS1B9EV -TqoyFf3ZmmKsX7E1+htsxSZtR3PbJRb7a7HUaiMthn9JzuCFIyHUjkMlvhKBiGFrXvXIeY5118Qx -x9Fw6aB4NTa33fwzRnXAfpSXH0dYp23+iR5QSV824rmXrqIgIRhqLDIFpI8MWHogC9egKsHkCaKD -fal+r2OuvdRZop1dIM9fP1YZanWNppsacmySM4jqpn4x1iOcfDOd45Z8ny2JUlwKB8Mn5JrR9KUI -rgQjDORnQDpZgck9zPFUYIdKiOFQ+hbQ5KTiHNyFsL4eMtit0GptLxmez7RMwGsV1j/YKcQMgSeg -DzTtJVWSjYJoyaw5me5W0wGQygsQmR0bOE0lCVhrJMcAAnQN34MH/CPxDhZ14W07V0gY9pILS1Ay -1tUgOOwG3Neq+hquuzJBd6a8oBh2x0XTd05evHjYzY5kxvJIwtYoarq2jDfatdzI58eS5j4s5s1Q -ao8lzEjtY1bJBtag+e/+1LRpBgP9lSJcByQ9fG4WeQYOAwuYDs+r8XRIlC9YKD0jtbET3lIAeHZO -3593WIZKebRGeKJ/Up3VMkO6jzNoVASjad04pKv1rt5qTRdkxegdQjSEOTgM8AFla4P+P0R0o8lD -Vwt/sZa5NSvlliC265C01k4AMc1UhAAXCg4vVmgBYu16kLVnncCm4YSlJsmy7gS8HyLZa66OtMNe -+xBuI1axw6qJnfURobFKiPQESDQxasTCTdiNeXsFC9wFY2FUOTzN0/EkcT3moYTSTxzxwHqu23FG -jNfCM3LNt1FpfreAFHFHhKRpGXBNUlCynY76+BQieBB9ePcmOm3wDA/PhyP8NWgrXyM6GTgxaxLt -TLlDjVH1l7Fwxq/h2KgiXz+0tBbVIyTiYHSx2/EP65wmbAtmxHSXvJchZA32OYdgPvGfygeIsd5h -AuR0ahPO3MMKusaaxvNsmOnq+xFOE3qcFKBaHbdH6m+Ic+dut+cF9iMXWHj0A4lefOCHV6AnDy5b -1n7pZTlg+6+iOnDvELjr9hgw6SnB36pHVAGWM3kAXXUtZtPolHZ0b01WV1D9TNBhzpxIy1HE9+Sp -5jt8sEFCGR4QHXuw0pq8yDSYJN2smjEnI6ezqqeu+DmIGZYXYAe07+HmxKdmVJVOAPOO5KwNGoJq -b3x6n59GzRS/UdNCtz047zUW1eEB3rvAjw73NIZj8lAw3llfv4etQHp1tOtqBliGucKYVoJPlocC -wFZNrOLEgRZ9cGNvNaVOAyLo7cR354c8Td+5H4Izrp6uIVE3J+JIgOKKEwARxNzfMT1xYySW+VgI -AQY8kAOPXhRARVytfg/Nceos0o30GopNqOhkZHyqgeH5NkX4t8zxXK5LLyjlSJ32lBseEbfmju5Z -DF2QYNX+UTAJjE4FqvDZZzKy2LQbVaHcsSN1JNRYPwgLfPG0Ljx0NWIuafsGt9cjZeABNS+HLnDU -90jwI56n78N/RfnLQD6Y5edOJlcx/tIkWSqlvywfM16VaGy9vN4turEc3kJ5R2rGi6xp9M04WUaf -Ygf0IatroGl6ZBtD+lRuN+rEBcDhPE+KqzWJ3WFxOXoSwYSgnxf12NluHalaDqrHT6WpHhlOI7Cv -M0/v7ykz7/m7Z7mTycyvWUwEttnliYprEA6TB9TqDL+N1QoHbUVm85e//bZASWI8A6nKz99gK9kg -Gz8a9A8FqOcGeaunTqA/ULgA8cWD4Zv/6CgrZk94mSc5d8yi/zTTcljhlVBKW8arKDVoL8yIdqwJ -r4PQ+ots1x6MrSNnkAqz6EnHNWfr7Guoo44NdCbiijCljl8p3zxe9PyRTcbVZUYN+Fl/gJCdsq9O -DIda6/zizmR1YniuLz2ysisYp/I6pNsjQlB5nVjmf4sFh93KGyFyG/1yAbYBOCJYlbcN9tNRj5cY -1CSekQZUW9VKOGJmnWdtGOA6y2D2edE7h3SYoBnoLqZw9Q/DJFVYqEoqRg+Xc1BOeYfzZ8mf8V6Z -R27zWUAid4d0fiutlkpgb9cwHohTFHs5WR2LYsd6tDc1toqZPWIdUisH6tpX+JuEisNT54xVX08d -M+CD1wCO9eJOyI4FYFUJkDCSdDj5Nqikc8MprZhkSsNYgYHdPQoetn3E1x2ajF+8qDtYyIbhhpxw -hJkyTN41EWaR/hm3j/FaHnRjehKJy+u96okzEepxfCnctq+zXqpzu6/ZgF/YjHXOyl5/vPpXEmyp -s0VqfxlQT1813Xtu7osgbskk2wbjgjohKWuZuk+I8RzvIJigiHqb9jNsc/647JMX6aG+drsvqDhF -mVwadF03a0ZWUbwQpynSN6J6Ct+YfRXE1rx6zFKWyndVsrWCd9+KaZzWSKquIhZze5qjG61uPeSH -kjHKxqWgsAFD532CAZE8BBq7hDv0bfJ+PtCyherocAXlZWZgo1KOjXuRUW1pZBMRK1MVRMR9uQOb -KhfynqMVnkcHWvvhLt+oVPVkRRrgGPO3I00f5yrsYZIOJVEjpBzPqRSJ4aGUFHXO75Z8Q1p6MC89 -0lvv8cafN+yuu7phzizRrMXBuvSQ4pDb8f4l64vWLwi+V55DeiEmFTUQyZxDgZx2ZbK1mZ190g+e -12rE2zhGO1mWinfIJIToSeiXjCRUndWkoPwBbzJUhIrjZ2onrLqNKp6K9BzfaQkWiX8RHhIJvFaU -s4VqTSzYV/GaGSTQi4KWEMPT4M4geXUICWdJxTWkes9HJJwXP9xhwiIpAFcyNvDKCaV6+OzO9EGw -Xegms5/9N2vuILnS0yYah7jzNPrSlBGJcxG8YflanhgspxHU+QXDuxjNEqOVPepSl9fF2bqCkAe3 -4l4FBxFKeeHXRF7b0ne39f7sHRH09vjKX7UrsZIvqhRfDpSRBc84BIDbk7CHoBpJBuotOn2gSGkT -kXvcQGDu2uCbeoB0zQQhg6vrQKjiAHyEyWpHAfp4mQTTXBBR4JuX4v4N8FOQLFqfGg+eLSj7gOi0 -2pMNaxWucOZfSlGJX1LVe/c7VH1QW6h7lpKh8gq/BlCMt5cxXQ6APtyZjEOLZZBp6AGM+vl6Yuoc -WEl4WohVCsQr09Ww6vz3PN6JJsyjR90RauiaoVRZ76aEhYxoDeVuGqo1fCep6VoKbkX46ygg3tHD -XtGPP/6XTIuSrAD5ifoMCDz7z7MzJ/vL15GSvUYqtd+kK9cM3QEjDbLfpdm1b7eZSf6bhK/m5EeH -RWhkOJ/xEDCczxHPq9loXZIUtYCJsCUhASN7LtfnGyINJeZxAC6pD8dOXQaIHth+qTUwwhsUoL9I -c4AEBDNMxAU2eSNbMwiSQnF5BnAZEzZmi7or5IFZYp95Pa1zxj0ixfnnaBNFS9xn0OA6gpBysgXi -rIwV3tkQsBPnqs8ATLawsyOAuvnqmOz/4iqxVFGcnAP3cyi4z4fFtrio3Svkx65+CGRxutqEoIRT -5VvwlUW8RMZ670G5L4aF6k1pGwLE31/MSyL2bVfwpoF6uVbHLGK6NZV+e8gUY6o89r2js7L0aooZ -iooIK35Nn+elDhjjT4cytKnsHui71g35qF8L/glDNOSjjPeuZ8lL8Tf7pmXFJcbWcydpcgjXTk03 -KLymggtomrVgWpLZPS5/xBEZS+WhE0Sakjkdp8YDF4jELUb1Lnj0QUAJNFy5AgkU0TSNJQ5b72qC -8WJr0y4Dl9nwkIo7PcugabH114IrEJBr2uWqPLd3Z7csr5c6PUIbF8wWL5wruZPwGOtnwXOo1Rfz -FnjX0ZDt3YAMMJNp6SPly+mn63dTS6KmfPTur6Rf/3MDmNTgjVgRmNXN1speCxxXbLUDJai5ztzU -jlyh60S2Av6onMMYFcUu6qYEjqeuGmnxCw0qKDjGAzedrUZdHft3CoTPvqTNXkFpldL/TsLSV1PZ -/zn6ipR/wVrbr/fUM4zhy8vHvBF4rExcM8RaLRbtwDhGPsSxepHeZMCCOzDhfwBqDMd7 -""") - -##file activate.sh -ACTIVATE_SH = convert(""" -eJytVVFvokAQfudXTLEPtTlLeo9tvMSmJpq02hSvl7u2wRUG2QR2DSxSe7n/frOACEVNLlceRHa+ -nfl25pvZDswCnoDPQ4QoTRQsENIEPci4CsBMZBq7CAsuLOYqvmYKTTj3YxnBgiXBudGBjUzBZUJI -BXEqgCvweIyuCjeG4eF2F5x14bcB9KQiQQWrjSddI1/oQIx6SYYeoFjzWIoIhYI1izlbhJjkKO7D -M/QEmKfO9O7WeRo/zr4P7pyHwWxkwitcgwpQ5Ej96OX+PmiFwLeVjFUOrNYKaq1Nud3nR2n8nI2m -k9H0friPTGVsUdptaxGrTEfpNVFEskxpXtUkkCkl1UNF9cgLBkx48J4EXyALuBtAwNYIjF5kcmUU -abMKmMq1ULoiRbgsDEkTSsKSGFCJ6Z8vY/2xYiSacmtyAfCDdCNTVZoVF8vSTQOoEwSnOrngBkws -MYGMBMg8/bMBLSYKS7pYEXP0PqT+ZmBT0Xuy+Pplj5yn4aM9nk72JD8/Wi+Gr98sD9eWSMOwkapD -BbUv91XSvmyVkICt2tmXR4tWmrcUCsjWOpw87YidEC8i0gdTSOFhouJUNxR+4NYBG0MftoCTD9F7 -2rTtxG3oPwY1b2HncYwhrlmj6Wq924xtGDWqfdNxap+OYxplEurnMVo9RWks+rH8qKEtx7kZT5zJ -4H7oOFclrN6uFe+d+nW2aIUsSgs/42EIPuOhXq+jEo3S6tX6w2ilNkDnIpHCWdEQhFgwj9pkk7FN -l/y5eQvRSIQ5+TrL05lewxWpt/Lbhes5cJF3mLET1MGhcKCF+40tNWnUulxrpojwDo2sObdje3Bz -N3QeHqf3D7OjEXMVV8LN3ZlvuzoWHqiUcNKHtwNd0IbvPGKYYM31nPKCgkUILw3KL+Y8l7aO1ArS -Ad37nIU0fCj5NE5gQCuC5sOSu+UdI2NeXg/lFkQIlFpdWVaWZRfvqGiirC9o6liJ9FXGYrSY9mI1 -D/Ncozgn13vJvsznr7DnkJWXsyMH7e42ljdJ+aqNDF1bFnKWFLdj31xtaJYK6EXFgqmV/ymD/ROG -+n8O9H8f5vsGOWXsL1+1k3g= -""") - -##file activate.fish -ACTIVATE_FISH = convert(""" -eJyVVWFv2jAQ/c6vuBoqQVWC9nVSNVGVCaS2VC2rNLWVZZILWAs2s52wVvvxsyEJDrjbmgpK7PP5 -3bt3d22YLbmGlGcIq1wbmCPkGhPYcLMEEsGciwGLDS+YwSjlekngLFVyBe73GXSXxqw/DwbuTS8x -yyKpFr1WG15lDjETQhpQuQBuIOEKY5O9tlppLqxHKSDByjVAPwEy+mXtCq5MzjIUBTCRgEKTKwFG -gpBqxTLYXgN2myspVigMaYF92tZSowGZJf4mFExxNs9Qb614CgZtmH0BpEOn11f0cXI/+za8pnfD -2ZjA1sg9zlV/8QvcMhxbNu0QwgYokn/d+n02nt6Opzcjcnx1vXcIoN74O4ymWQXmHURfJw9jenc/ -vbmb0enj6P5+cuVhqlKm3S0u2XRtRbA2QQAhV7VhBF0rsgUX9Ur1rBUXJgVSy8O751k8mzY5OrKH -RW3eaQhYGTr8hrXO59ALhxQ83mCsDLAid3T72CCSdJhaFE+fXgicXAARUiR2WeVO37gH3oYHzFKo -9k7CaPZ1UeNwH1tWuXA4uFKYYcEa8vaKqXl7q1UpygMPhFLvlVKyNzsSM3S2km7UBOl4xweUXk5u -6e3wZmQ9leY1XE/Ili670tr9g/5POBBpGIJXCCF79L1siarl/dbESa8mD8PL61GpzqpzuMS7tqeB -1YkALrRBloBMbR9yLcVx7frQAgUqR7NZIuzkEu110gbNit1enNs82Rx5utq7Z3prU78HFRgulqNC -OTwbqJa9vkJFclQgZSjbKeBgSsUtCtt9D8OwAbIVJuewQdfvQRaoFE9wd1TmCuRG7OgJ1bVXGHc7 -z5WDL/WW36v2oi37CyVBak61+yPBA9C1qqGxzKQqZ0oPuocU9hpud0PIp8sDHkXR1HKkNlzjuUWA -a0enFUyzOWZA4yXGP+ZMI3Tdt2OuqU/SO4q64526cPE0A7ZyW2PMbWZiZ5HamIZ2RcCKLXhcDl2b -vXL+eccQoRzem80mekPDEiyiWK4GWqZmwxQOmPM0eIfgp1P9cqrBsewR2p/DPMtt+pfcYM+Ls2uh -hALufTAdmGl8B1H3VPd2af8fQAc4PgqjlIBL9cGQqNpXaAwe3LrtVn8AkZTUxg== -""") - -##file activate.csh -ACTIVATE_CSH = convert(""" -eJx9VG1P2zAQ/u5fcYQKNgTNPtN1WxlIQ4KCUEGaxuQ6yYVYSuzKdhqVX7+zk3bpy5YPUXL3PPfc -ne98DLNCWshliVDV1kGCUFvMoJGugMjq2qQIiVSxSJ1cCofD1BYRnOVGV0CfZ0N2DD91DalQSjsw -tQLpIJMGU1euvPe7QeJlkKzgWixlhnAt4aoUVsLnLBiy5NtbJWQ5THX1ZciYKKWwkOFaE04dUm6D -r/zh7pq/3D7Nnid3/HEy+wFHY/gEJydg0aFaQrBFgz1c5DG1IhTs+UZgsBC2GMFBlaeH+8dZXwcW -VPvCjXdlAvCfQsE7al0+07XjZvrSCUevR5dnkVeKlFYZmUztG4BdzL2u9KyLVabTU0bdfg7a0hgs -cSmUg6UwUiQl2iHrcbcVGNvPCiLOe7+cRwG13z9qRGgx2z6DHjfm/Op2yqeT+xvOLzs0PTKHDz2V -tkckFHoQfQRXoGJAj9el0FyJCmEMhzgMS4sB7KPOE2ExoLcSieYwDvR+cP8cg11gKkVJc2wRcm1g -QhYFlXiTaTfO2ki0fQoiFM4tLuO4aZrhOzqR4dIPcWx17hphMBY+Srwh7RTyN83XOWkcSPh1Pg/k -TXX/jbJTbMtUmcxZ+/bbqOsy82suFQg/BhdSOTRhMNBHlUarCpU7JzBhmkKmRejKOQzayQe6MWoa -n1wqWmuh6LZAaHxcdeqIlVLhIBJdO9/kbl0It2oEXQj+eGjJOuvOIR/YGRqvFhttUB2XTvLXYN2H -37CBdbW2W7j2r2+VsCn0doVWcFG1/4y1VwBjfwAyoZhD -""") - -##file activate.bat -ACTIVATE_BAT = convert(""" -eJx9UdEKgjAUfW6wfxjiIH+hEDKUFHSKLCMI7kNOEkIf9P9pTJ3OLJ/03HPPPed4Es9XS9qqwqgT -PbGKKOdXL4aAFS7A4gvAwgijuiKlqOpGlATS2NeMLE+TjJM9RkQ+SmqAXLrBo1LLIeLdiWlD6jZt -r7VNubWkndkXaxg5GO3UaOOKS6drO3luDDiO5my3iA0YAKGzPRV1ack8cOdhysI0CYzIPzjSiH5X -0QcvC8Lfaj0emsVKYF2rhL5L3fCkVjV76kShi59NHwDniAHzkgDgqBcwOgTMx+gDQQqXCw== -""") - -##file deactivate.bat -DEACTIVATE_BAT = convert(""" -eJxzSE3OyFfIT0vj4ipOLVEI8wwKCXX0iXf1C7Pl4spMU0hJTcvMS01RiPf3cYmHyQYE+fsGhCho -cCkAAUibEkTEVhWLMlUlLk6QGixStlyaeCyJDPHw9/Pw93VFsQguim4ZXAJoIUw5DhX47XUM8UCx -EchHtwsohN1bILUgw61c/Vy4AJYPYm4= -""") - -##file activate.ps1 -ACTIVATE_PS = convert(""" -eJylWdmS40Z2fVeE/oHT6rCloNUEAXDThB6wAyQAEjsB29GBjdgXYiWgmC/zgz/Jv+AEWNVd3S2N -xuOKYEUxM+/Jmzfvcm7W//zXf/+wUMOoXtyi1F9kbd0sHH/hFc2iLtrK9b3FrSqyxaVQwr8uhqJd -uHaeg9mqzRdR8/13Pyy8qPLdJh0+LMhi0QCoXxYfFh9WtttEnd34H8p6/f1300KauwrULws39e18 -0ZaLNm9rgN/ZVf3h++/e124Vlc0vKsspHy+Yyi5+XbzPhijvCtduoiL/kA1ukWV27n0o7Sb8LIFj -CvWR5GQgUJdp1Pw8TS9+rPy6SDv/+e3d+0+4qw8f3v20+PliV37efEYBAB9FTKC+RHn/Cfxn3rdv -00Fube5O+iyCtHDs9BfPfz3q4sfFv9d91Ljhfy7ei0VO+nVTtdOkv/jpt0l2AX6iG1jXgKnnDuD4 -ke2k/i8fzzz5UedkVcP4pwF+Wvz2FJl+3vt598urXf5Y6LNA5WcFOP7r0sW7b9a+W/xcu0Xpv5zk -Kfq3P9Dz9di/fCxS72MXVU1rpx9L4Bxl85Wmn5a+zP76Zuh3pL9ROWr87PN+//GHIl+oOtvn9XSU -qH+p0gQBFnx1uV+JLH5O5zv+PXW+WepXVVHZT0+oQezkIATcIm+ivPV/z5J/+cYj3ir4w0Lx09vC -e5n/y5/Y5LPPfdrqb88ga/PabxZRVfmp39l588m/6u+/e+OpP+dF7n1WZpJ9//Z4v372fDDz9eHB -7Juvs/BLMHzrxL9+9twXpJfhd1/DrpQ5Euu/vlss3wp9HXC/54C/Ld69m6zwdx3tC0d8daSv0V8B -n4b9YYF53sJelJV/ix6LZspw/sJtqyl5LJ5r/23htA1Imfm/gt9R7dqVB1LjhydAX4Gb+zksQF59 -9+P7H//U+376afFuvh2/T6P85Xr/5c8C6OXyFY4BGuN+EE0+GeR201b+wkkLN5mmBY5TfMw8ngqL -CztXxCSXKMCYrRIElWkEJlEPYsSOeKBVZCAQTKBhApMwRFQzmCThE0YQu2CdEhgjbgmk9GluHpfR -/hhwJCZhGI5jt5FsAkOrObVyE6g2y1snyhMGFlDY1x+BoHpCMulTj5JYWNAYJmnKpvLxXgmQ8az1 -4fUGxxcitMbbhDFcsiAItg04E+OSBIHTUYD1HI4FHH4kMREPknuYRMyhh3AARWMkfhCketqD1CWJ -mTCo/nhUScoQcInB1hpFhIKoIXLo5jLpwFCgsnLCx1QlEMlz/iFEGqzH3vWYcpRcThgWnEKm0QcS -rA8ek2a2IYYeowUanOZOlrbWSJUC4c7y2EMI3uJPMnMF/SSXdk6E495VLhzkWHps0rOhKwqk+xBI -DhJirhdUCTamMfXz2Hy303hM4DFJ8QL21BcPBULR+gcdYxoeiDqOFSqpi5B5PUISfGg46gFZBPo4 -jdh8lueaWuVSMTURfbAUnLINr/QYuuYoMQV6l1aWxuZVTjlaLC14UzqZ+ziTGDzJzhiYoPLrt3uI -tXkVR47kAo09lo5BD76CH51cTt1snVpMOttLhY93yxChCQPI4OBecS7++h4p4Bdn4H97bJongtPk -s9gQnXku1vzsjjmX4/o4YUDkXkjHwDg5FXozU0fW4y5kyeYW0uJWlh536BKr0kMGjtzTkng6Ep62 -uTWnQtiIqKnEsx7e1hLtzlXs7Upw9TwEnp0t9yzCGgUJIZConx9OHJArLkRYW0dW42G9OeR5Nzwk -yk1mX7du5RGHT7dka7N3AznmSif7y6tuKe2N1Al/1TUPRqH6E2GLVc27h9IptMLkCKQYRqPQJgzV -2m6WLsSipS3v3b1/WmXEYY1meLEVIU/arOGVkyie7ZsH05ZKpjFW4cpY0YkjySpSExNG2TS8nnJx -nrQmWh2WY3cP1eISP9wbaVK35ZXc60yC3VN/j9n7UFoK6zvjSTE2+Pvz6Mx322rnftfP8Y0XKIdv -Qd7AfK0nexBTMqRiErvCMa3Hegpfjdh58glW2oNMsKeAX8x6YJLZs9K8/ozjJkWL+JmECMvhQ54x -9rsTHwcoGrDi6Y4I+H7yY4/rJVPAbYymUH7C2D3uiUS3KQ1nrCAUkE1dJMneDQIJMQQx5SONxoEO -OEn1/Ig1eBBUeEDRuOT2WGGGE4bNypBLFh2PeIg3bEbg44PHiqNDbGIQm50LW6MJU62JHCGBrmc9 -2F7WBJrrj1ssnTAK4sxwRgh5LLblhwNAclv3Gd+jC/etCfyfR8TMhcWQz8TBIbG8IIyAQ81w2n/C -mHWAwRzxd3WoBY7BZnsqGOWrOCKwGkMMNfO0Kci/joZgEocLjNnzgcmdehPHJY0FudXgsr+v44TB -I3jnMGnsK5veAhgi9iXGifkHMOC09Rh9cAw9sQ0asl6wKMk8mpzFYaaDSgG4F0wisQDDBRpjCINg -FIxhlhQ31xdSkkk6odXZFpTYOQpOOgw9ugM2cDQ+2MYa7JsEirGBrOuxsQy5nPMRdYjsTJ/j1iNw -FeSt1jY2+dd5yx1/pzZMOQXUIDcXeAzR7QlDRM8AMkUldXOmGmvYXPABjxqkYKO7VAY6JRU7kpXr -+Epu2BU3qFFXClFi27784LrDZsJwbNlDw0JzhZ6M0SMXE4iBHehCpHVkrQhpTFn2dsvsZYkiPEEB -GSEAwdiur9LS1U6P2U9JhGp4hnFpJo4FfkdJHcwV6Q5dV1Q9uNeeu7rV8PAjwdFg9RLtroifOr0k -uOiRTo/obNPhQIf42Fr4mtThWoSjitEdAmFW66UCe8WFjPk1YVNpL9srFbond7jrLg8tqAasIMpy -zkH0SY/6zVAwJrEc14zt14YRXdY+fcJ4qOd2XKB0/Kghw1ovd11t2o+zjt+txndo1ZDZ2T+uMVHT -VSXhedBAHoJIID9xm6wPQI3cXY+HR7vxtrJuCKh6kbXaW5KkVeJsdsjqsYsOwYSh0w5sMbu7LF8J -5T7U6LJdiTx+ca7RKlulGgS5Z1JSU2Llt32cHFipkaurtBrvNX5UtvNZjkufZ/r1/XyLl6yOpytL -Km8Fn+y4wkhlqZP5db0rooqy7xdL4wxzFVTX+6HaxuQJK5E5B1neSSovZ9ALB8091dDbbjVxhWNY -Ve5hn1VnI9OF0wpvaRm7SZuC1IRczwC7GnkhPt3muHV1YxUJfo+uh1sYnJy+vI0ZwuPV2uqWJYUH -bmBsi1zmFSxHrqwA+WIzLrHkwW4r+bad7xbOzJCnKIa3S3YvrzEBK1Dc0emzJW+SqysQfdEDorQG -9ZJlbQzEHQV8naPaF440YXzJk/7vHGK2xwuP+Gc5xITxyiP+WQ4x18oXHjFzCBy9kir1EFTAm0Zq -LYwS8MpiGhtfxiBRDXpxDWxk9g9Q2fzPPAhS6VFDAc/aiNGatUkPtZIStZFQ1qD0IlJa/5ZPAi5J -ySp1ETDomZMnvgiysZSBfMikrSDte/K5lqV6iwC5q7YN9I1dBZXUytDJNqU74MJsUyNNLAPopWK3 -tzmLkCiDyl7WQnj9sm7Kd5kzgpoccdNeMw/6zPVB3pUwMgi4C7hj4AMFAf4G27oXH8NNT9zll/sK -S6wVlQwazjxWKWy20ZzXb9ne8ngGalPBWSUSj9xkc1drsXkZ8oOyvYT3e0rnYsGwx85xZB9wKeKg -cJKZnamYwiaMymZvzk6wtDUkxmdUg0mPad0YHtvzpjEfp2iMxvORhnx0kCVLf5Qa43WJsVoyfEyI -pzmf8ruM6xBr7dnBgzyxpqXuUPYaKahOaz1LrxNkS/Q3Ae5AC+xl6NbxAqXXlzghZBZHmOrM6Y6Y -ctAkltwlF7SKEsShjVh7QHuxMU0a08/eiu3x3M+07OijMcKFFltByXrpk8w+JNnZpnp3CfgjV1Ax -gUYCnWwYow42I5wHCcTzLXK0hMZN2DrPM/zCSqe9jRSlJnr70BPE4+zrwbk/xVIDHy2FAQyHoomT -Tt5jiM68nBQut35Y0qLclLiQrutxt/c0OlSqXAC8VrxW97lGoRWzhOnifE2zbF05W4xuyhg7JTUL -aqJ7SWDywhjlal0b+NLTpERBgnPW0+Nw99X2Ws72gOL27iER9jgzj7Uu09JaZ3n+hmCjjvZpjNst -vOWWTbuLrg+/1ltX8WpPauEDEvcunIgTxuMEHweWKCx2KQ9DU/UKdO/3za4Szm2iHYL+ss9AAttm -gZHq2pkUXFbV+FiJCKrpBms18zH75vax5jSo7FNunrVWY3Chvd8KKnHdaTt/6ealwaA1x17yTlft -8VBle3nAE+7R0MScC3MJofNCCkA9PGKBgGMYEwfB2QO5j8zUqa8F/EkWKCzGQJ5EZ05HTly1B01E -z813G5BY++RZ2sxbQS8ZveGPJNabp5kXAeoign6Tlt5+L8i5ZquY9+S+KEUHkmYMRFBxRrHnbl2X -rVemKnG+oB1yd9+zT+4c43jQ0wWmQRR6mTCkY1q3VG05Y120ZzKOMBe6Vy7I5Vz4ygPB3yY4G0FP -8RxiMx985YJPXsgRU58EuHj75gygTzejP+W/zKGe78UQN3yOJ1aMQV9hFH+GAfLRsza84WlPLAI/ -9G/5JdcHftEfH+Y3/fHUG7/o8bv98dzzy3e8S+XCvgqB+VUf7sH0yDHpONdbRE8tAg9NWOzcTJ7q -TuAxe/AJ07c1Rs9okJvl1/0G60qvbdDzz5zO0FuPFQIHNp9y9Bd1CufYVx7dB26mAxwa8GMNrN/U -oGbNZ3EQ7inLzHy5tRg9AXJrN8cB59cCUBeCiVO7zKM0jU0MamhnRThkg/NMmBOGb6StNeD9tDfA -7czsAWopDdnGoXUHtA+s/k0vNPkBcxEI13jVd/axp85va3LpwGggXXWw12Gwr/JGAH0b8CPboiZd -QO1l0mk/UHukud4C+w5uRoNzpCmoW6GbgbMyaQNkga2pQINB18lOXOCJzSWPFOhZcwzdgrsQnne7 -nvjBi+7cP2BbtBeDOW5uOLGf3z94FasKIguOqJl+8ss/6Kumns4cuWbqq5592TN/RNIbn5Qo6qbi -O4F0P9txxPAwagqPlftztO8cWBzdN/jz3b7GD6JHYP/Zp4ToAMaA74M+EGSft3hEGMuf8EwjnTk/ -nz/P7SLipB/ogQ6xNX0fDqNncMCfHqGLCMM0ZzFa+6lPJYQ5p81vW4HkCvidYf6kb+P/oB965g8K -C6uR0rdjX1DNKc5pOSTquI8uQ6KXxYaKBn+30/09tK4kMpJPgUIQkbENEPbuezNPPje2Um83SgyX -GTCJb6MnGVIpgncdQg1qz2bvPfxYD9fewCXDomx9S+HQJuX6W3VAL+v5WZMudRQZk9ZdOk6GIUtC -PqEb/uwSIrtR7/edzqgEdtpEwq7p2J5OQV+RLrmtTvFwFpf03M/VrRyTZ73qVod7v7Jh2Dwe5J25 -JqFOU2qEu1sP+CRotklediycKfLjeIZzjJQsvKmiGSNQhxuJpKa+hoWUizaE1PuIRGzJqropwgVB -oo1hr870MZLgnXF5ZIpr6mF0L8aSy2gVnTAuoB4WEd4d5NPVC9TMotYXERKlTcwQ2KiB/C48AEfH -Qbyq4CN8xTFnTvf/ebOc3isnjD95s0QF0nx9s+y+zMmz782xL0SgEmRpA3x1w1Ff9/74xcxKEPdS -IEFTz6GgU0+BK/UZ5Gwbl4gZwycxEw+Kqa5QmMkh4OzgzEVPnDAiAOGBFaBW4wkDmj1G4RyElKgj -NlLCq8zsp085MNh/+R4t1Q8yxoSv8PUpTt7izZwf2BTHZZ3pIZpUIpuLkL1nNL6sYcHqcKm237wp -T2+RCjgXweXd2Zp7ZM8W6dG5bZsqo0nrJBTx8EC0+CQQdzEGnabTnkzofu1pYkWl4E7XSniECdxy -vLYavPMcL9LW5SToJFNnos+uqweOHriUZ1ntIYZUonc7ltEQ6oTRtwOHNwez2sVREskHN+bqG3ua -eaEbJ8XpyO8CeD9QJc8nbLP2C2R3A437ISUNyt5Yd0TbDNcl11/DSsOzdbi/VhCC0KE6v1vqVNkq -45ZnG6fiV2NwzInxCNth3BwL0+8814jE6+1W1EeWtpWbSZJOJNYXmWRXa7vLnAljE692eHjZ4y5u -y1u63De0IzKca7As48Z3XshVF+3XiLNz0JIMh/JOpbiNLlMi672uO0wYzOCZjRxcxj3D+gVenGIE -MvFUGGXuRps2RzMcgWIRolHXpGUP6sMsQt1hspUBnVKUn/WQj2u6j3SXd9Xz0QtEzoM7qTu5y7gR -q9gNNsrlEMLdikBt9bFvBnfbUIh6voTw7eDsyTmPKUvF0bHqWLbHe3VRHyRZnNeSGKsB73q66Vsk -taxWYmwz1tYVFG/vOQhlM0gUkyvIab3nv2caJ1udU1F3pDMty7stubTE4OJqm0i0ECfrJIkLtraC -HwRWKzlqpfhEIqYH09eT9WrOhQyt8YEoyBlnXtAT37WHIQ03TIuEHbnRxZDdLun0iok9PUC79prU -m5beZzfQUelEXnhzb/pIROKx3F7qCttYIFGh5dXNzFzID7u8vKykA8Uejf7XXz//S4nKvW//ofS/ -QastYw== -""") - -##file distutils-init.py -DISTUTILS_INIT = convert(""" -eJytV1uL4zYUfvevOE0ottuMW9q3gVDa3aUMXXbLMlDKMBiNrSTqOJKRlMxkf33PkXyRbGe7Dw2E -UXTu37lpxLFV2oIyifAncxmOL0xLIfcG+gv80x9VW6maw7o/CANSWWBwFtqeWMPlGY6qPjV8A0bB -C4eKSTgZ5LRgFeyErMEeOBhbN+Ipgeizhjtnhkn7DdyjuNLPoCS0l/ayQTG0djwZC08cLXozeMss -aG5EzQ0IScpnWtHSTXuxByV/QCmxE7y+eS0uxWeoheaVVfqSJHiU7Mhhi6gULbOHorshkrEnKxpT -0n3A8Y8SMpuwZx6aoix3ouFlmW8gHRSkeSJ2g7hU+kiHLDaQw3bmRDaTGfTnty7gPm0FHbIBg9U9 -oh1kZzAFLaue2R6htPCtAda2nGlDSUJ4PZBgCJBGVcwKTAMz/vJiLD+Oin5Z5QlvDPdulC6EsiyE -NFzb7McNTKJzbJqzphx92VKRFY1idenzmq3K0emRcbWBD0ryqc4NZGmKOOOX9Pz5x+/l27tP797c -f/z0d+4NruGNai8uAM0bfsYaw8itFk8ny41jsfpyO+BWlpqfhcG4yxLdi/0tQqoT4a8Vby382mt8 -p7XSo7aWGdPBc+b6utaBmCQ7rQKQoWtAuthQCiold2KfJIPTT8xwg9blPumc+YDZC/wYGdAyHpJk -vUbHbHWAp5No6pK/WhhLEWrFjUwtPEv1Agf8YmnsuXUQYkeZoHm8ogP16gt2uHoxcEMdf2C6pmbw -hUMsWGhanboh4IzzmsIpWs134jVPqD/c74bZHdY69UKKSn/+KfVhxLgUlToemayLMYQOqfEC61bh -cbhwaqoGUzIyZRFHPmau5juaWqwRn3mpWmoEA5nhzS5gog/5jbcFQqOZvmBasZtwYlG93k5GEiyw -buHhMWLjDarEGpMGB2LFs5nIJkhp/nUmZneFaRth++lieJtHepIvKgx6PJqIlD9X2j6pG1i9x3pZ -5bHuCPFiirGHeO7McvoXkz786GaKVzC9DSpnOxJdc4xm6NSVq7lNEnKdVlnpu9BNYoKX2Iq3wvgh -gGEUM66kK6j4NiyoneuPLSwaCWDxczgaolEWpiMyDVDb7dNuLAbriL8ig8mmeju31oNvQdpnvEPC -1vAXbWacGRVrGt/uXN/gU0CDDwgooKRrHfTBb1/s9lYZ8ZqOBU0yLvpuP6+K9hLFsvIjeNhBi0KL -MlOuWRn3FRwx5oHXjl0YImUx0+gLzjGchrgzca026ETmYJzPD+IpuKzNi8AFn048Thd63OdD86M6 -84zE8yQm0VqXdbbgvub2pKVnS76icBGdeTHHXTKspUmr4NYo/furFLKiMdQzFjHJNcdAnMhltBJK -0/IKX3DVFqvPJ2dLE7bDBkH0l/PJ29074+F0CsGYOxsb7U3myTUncYfXqnLLfa6sJybX4g+hmcjO -kMRBfA1JellfRRKJcyRpxdS4rIl6FdmQCWjo/o9Qz7yKffoP4JHjOvABcRn4CZIT2RH4jnxmfpVG -qgLaAvQBNfuO6X0/Ux02nb4FKx3vgP+XnkX0QW9pLy/NsXgdN24dD3LxO2Nwil7Zlc1dqtP3d7/h -kzp1/+7hGBuY4pk0XD/0Ao/oTe/XGrfyM773aB7iUhgkpy+dwAMalxMP0DrBcsVw/6p25+/hobP9 -GBknrWExDhLJ1bwt1NcCNblaFbMKCyvmX0PeRaQ= -""") - -##file distutils.cfg -DISTUTILS_CFG = convert(""" -eJxNj00KwkAMhfc9xYNuxe4Ft57AjYiUtDO1wXSmNJnK3N5pdSEEAu8nH6lxHVlRhtDHMPATA4uH -xJ4EFmGbvfJiicSHFRzUSISMY6hq3GLCRLnIvSTnEefN0FIjw5tF0Hkk9Q5dRunBsVoyFi24aaLg -9FDOlL0FPGluf4QjcInLlxd6f6rqkgPu/5nHLg0cXCscXoozRrP51DRT3j9QNl99AP53T2Q= -""") - -##file activate_this.py -ACTIVATE_THIS = convert(""" -eJyNU01v2zAMvetXEB4K21jmDOstQA4dMGCHbeihlyEIDMWmG62yJEiKE//7kXKdpN2KzYBt8euR -fKSyLPs8wiEo8wh4wqZTGou4V6Hm0wJa1cSiTkJdr8+GsoTRHuCotBayiWqQEYGtMCgfD1KjGYBe -5a3p0cRKiAe2NtLADikftnDco0ko/SFEVgEZ8aRC5GLux7i3BpSJ6J1H+i7A2CjiHq9z7JRZuuQq -siwTIvpxJYCeuWaBpwZdhB+yxy/eWz+ZvVSU8C4E9FFZkyxFsvCT/ZzL8gcz9aXVE14Yyp2M+2W0 -y7n5mp0qN+avKXvbsyyzUqjeWR8hjGE+2iCE1W1tQ82hsCZN9UzlJr+/e/iab8WfqsmPI6pWeUPd -FrMsd4H/55poeO9n54COhUs+sZNEzNtg/wanpjpuqHJaxs76HtZryI/K3H7KJ/KDIhqcbJ7kI4ar -XL+sMgXnX0D+Te2Iy5xdP8yueSlQB/x/ED2BTAtyE3K4SYUN6AMNfbO63f4lBW3bUJPbTL+mjSxS -PyRfJkZRgj+VbFv+EzHFi5pKwUEepa4JslMnwkowSRCXI+m5XvEOvtuBrxHdhLalG0JofYBok6qj -YdN2dEngUlbC4PG60M1WEN0piu7Nq7on0mgyyUw3iV1etLo6r/81biWdQ9MWHFaePWZYaq+nmp+t -s3az+sj7eA0jfgPfeoN1 -""") - -MH_MAGIC = 0xfeedface -MH_CIGAM = 0xcefaedfe -MH_MAGIC_64 = 0xfeedfacf -MH_CIGAM_64 = 0xcffaedfe -FAT_MAGIC = 0xcafebabe -BIG_ENDIAN = '>' -LITTLE_ENDIAN = '<' -LC_LOAD_DYLIB = 0xc -maxint = majver == 3 and getattr(sys, 'maxsize') or getattr(sys, 'maxint') - - -class fileview(object): - """ - A proxy for file-like objects that exposes a given view of a file. - Modified from macholib. - """ - - def __init__(self, fileobj, start=0, size=maxint): - if isinstance(fileobj, fileview): - self._fileobj = fileobj._fileobj - else: - self._fileobj = fileobj - self._start = start - self._end = start + size - self._pos = 0 - - def __repr__(self): - return '<fileview [%d, %d] %r>' % ( - self._start, self._end, self._fileobj) - - def tell(self): - return self._pos - - def _checkwindow(self, seekto, op): - if not (self._start <= seekto <= self._end): - raise IOError("%s to offset %d is outside window [%d, %d]" % ( - op, seekto, self._start, self._end)) - - def seek(self, offset, whence=0): - seekto = offset - if whence == os.SEEK_SET: - seekto += self._start - elif whence == os.SEEK_CUR: - seekto += self._start + self._pos - elif whence == os.SEEK_END: - seekto += self._end - else: - raise IOError("Invalid whence argument to seek: %r" % (whence,)) - self._checkwindow(seekto, 'seek') - self._fileobj.seek(seekto) - self._pos = seekto - self._start - - def write(self, bytes): - here = self._start + self._pos - self._checkwindow(here, 'write') - self._checkwindow(here + len(bytes), 'write') - self._fileobj.seek(here, os.SEEK_SET) - self._fileobj.write(bytes) - self._pos += len(bytes) - - def read(self, size=maxint): - assert size >= 0 - here = self._start + self._pos - self._checkwindow(here, 'read') - size = min(size, self._end - here) - self._fileobj.seek(here, os.SEEK_SET) - bytes = self._fileobj.read(size) - self._pos += len(bytes) - return bytes - - -def read_data(file, endian, num=1): - """ - Read a given number of 32-bits unsigned integers from the given file - with the given endianness. - """ - res = struct.unpack(endian + 'L' * num, file.read(num * 4)) - if len(res) == 1: - return res[0] - return res - - -def mach_o_change(path, what, value): - """ - Replace a given name (what) in any LC_LOAD_DYLIB command found in - the given binary with a new name (value), provided it's shorter. - """ - - def do_macho(file, bits, endian): - # Read Mach-O header (the magic number is assumed read by the caller) - cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags = read_data(file, endian, 6) - # 64-bits header has one more field. - if bits == 64: - read_data(file, endian) - # The header is followed by ncmds commands - for n in range(ncmds): - where = file.tell() - # Read command header - cmd, cmdsize = read_data(file, endian, 2) - if cmd == LC_LOAD_DYLIB: - # The first data field in LC_LOAD_DYLIB commands is the - # offset of the name, starting from the beginning of the - # command. - name_offset = read_data(file, endian) - file.seek(where + name_offset, os.SEEK_SET) - # Read the NUL terminated string - load = file.read(cmdsize - name_offset).decode() - load = load[:load.index('\0')] - # If the string is what is being replaced, overwrite it. - if load == what: - file.seek(where + name_offset, os.SEEK_SET) - file.write(value.encode() + '\0'.encode()) - # Seek to the next command - file.seek(where + cmdsize, os.SEEK_SET) - - def do_file(file, offset=0, size=maxint): - file = fileview(file, offset, size) - # Read magic number - magic = read_data(file, BIG_ENDIAN) - if magic == FAT_MAGIC: - # Fat binaries contain nfat_arch Mach-O binaries - nfat_arch = read_data(file, BIG_ENDIAN) - for n in range(nfat_arch): - # Read arch header - cputype, cpusubtype, offset, size, align = read_data(file, BIG_ENDIAN, 5) - do_file(file, offset, size) - elif magic == MH_MAGIC: - do_macho(file, 32, BIG_ENDIAN) - elif magic == MH_CIGAM: - do_macho(file, 32, LITTLE_ENDIAN) - elif magic == MH_MAGIC_64: - do_macho(file, 64, BIG_ENDIAN) - elif magic == MH_CIGAM_64: - do_macho(file, 64, LITTLE_ENDIAN) - - assert(len(what) >= len(value)) - do_file(open(path, 'r+b')) - - -if __name__ == '__main__': - main() - -## TODO: -## Copy python.exe.manifest -## Monkeypatch distutils.sysconfig diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/__init__.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/backup/benchmarks.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/backup/benchmarks.py deleted file mode 100644 index 7a19da0684e4006c25dda36c5736a7e698090900..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/backup/benchmarks.py +++ /dev/null @@ -1,493 +0,0 @@ - -# NOTES: Batch13 (Baseline) Batch14 - With ErrorSens (10, 25, 35) - -# Batch 9: No Error Sens. Min : P3 -# Batch 10: No Error Sens + More Runs for Loss1 and Loss2. Min: P3 -# Batch 11: Error Sens: Skipping 30% elems in each : Min: P3. More runs in Loss1 (4000) and Loss2 (2000) -# Batch 12: Error Sens: 10, 25, 35, for Loss1, 2, 3, respectively, Min: P3. 1000 Runs for All -# Batch 13: No Error Sens: Equal Runs (1000) for all. Min: P1 -# Batch 14: Reruning Batch12 with bugFix! -# Batch 15: MAJOR CHANGE: 3 different skip levels for each Loss1,Loss2,Loss3 - -# Batch 18: Batch13 (Basline) + ParetoCurve (1500 Runs) - BUGGY IGNORE!!! - -# Batch 19: (Basline) + ParetoCurve + 2 runs in Tuning Phase (1500 Runs) - -# Batch 20: 3-Skip levels + + 2 runs + 1500 Runs + EnergyBandSize now % of Max (Compare against Batch19 - - -# Batch 200: AlgoTuner - 1000 images - 1500 runs (IGNORE) -# Batch 201: AlgoTuner - 2000 images - 1500 runs -# Batch 202: AlgoTuner - 2000 images - 500 runs -# Batch 203: AlgoTuner - 2000 images - 3000 runs - - -#---- CHANGES: i) Reshufled inputs ii) 3K images for tuning -# Batch 210: 3K images, 1000 runs (1500 resnet), no FP32 used in tuning -# Batch 211: Same as Batch-210 + uses tensorConvPerfCuda*Half* - - -#batch_id = "batch210" -#batch_id = "batch211" -#batch_id = "batch210" - - -batch_id = "batch310" - - -class Benchmark: - def __init__(self): - self.tuner_binary = "" - self.promise_binary = "" - self.tuner_accuracy = 0 - self.promise_accuracy = 0 - self.num_flags = 0 - self.num_layers = 0 - self.autotuner_runs = 0 - self.error_range_1 = 0 - self.error_range_2 = 0 - self.result_dir_1 = "" - self.result_dir_2 = "" - self.promise_result_dir_1 = "" - self.promise_result_dir_2 = "" - - - -bench_tuner_data = {} - -# FIXIT: Fix Variable Names below -Alexnet1 = Benchmark() -Alexnet1.tuner_binary = "alexnet_cifar10_tuner" -Alexnet1.fp16_binary = "alexnet_half" -Alexnet1.promise_binary = "alexnet_promise" -Alexnet1.validation_binary = "alexnet_valid" -Alexnet1.num_flags = 21 -Alexnet1.num_layers = 6 -Alexnet1.error_range_1 = 10 -Alexnet1.error_range_2 = 13 -Alexnet1.start_promise_range = 1 -Alexnet1.skip_layers = 0 -#Alexnet1.skip_layer_str = "0" -Alexnet1.skip_layer_str = "5_0" - -Alexnet1.base_dir = "../build_tuner/tuner_results/alexnet_cifar10/" -Alexnet1.result_dir_1 = "../build_tuner/tuner_results/alexnet_cifar10/loss_1/" + batch_id -Alexnet1.result_dir_2 = "../build_tuner/tuner_results/alexnet_cifar10/loss_2/" + batch_id -Alexnet1.result_dir_3 = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/" + batch_id - -Alexnet1.tensor_desc_file = "tuner_results/alexnet_cifar10/alexnet_tensors.txt" -Alexnet1.layer_file = "tuner_results/alexnet_cifar10/alexnet_layers.txt" -Alexnet1.cost_file = "../build_tuner/tuner_results/alexnet_cifar10/op_cost.txt" -Alexnet1.layer_knobs = "../opentuner/data/alexnet/knobs.txt" - -#Alexnet1.loss1_result_file = "tuner_results/alexnet2_cifar10/alexnet_layers.txt" -Alexnet1.loss1_result_file = "tuner_results/alexnet_cifar10/loss_1/promise_tuned_confs/promise_confs.txt" -Alexnet1.loss2_result_file = "tuner_results/alexnet_cifar10/loss_2/promise_tuned_confs/promise_confs.txt" - -Alexnet1.autotuner_runs = 1000 -Alexnet1.tuner_accuracy = 79.9 -#Alexnet1.promise_accuracy = 79.9 -Alexnet1.promise_accuracy = 78.86 -Alexnet1.validation_accuracy = 79.19 - -bench_tuner_data["alexnet_cifar10"] = Alexnet1 - - -Alexnet2 = Benchmark() -Alexnet2.tuner_binary = "alexnet2_cifar10_tuner" -Alexnet2.fp16_binary = "alexnet2_half" -Alexnet2.promise_binary = "alexnet2_promise" -Alexnet2.validation_binary = "alexnet2_valid" -Alexnet2.num_flags = 23 -Alexnet2.num_layers = 7 -Alexnet2.error_range_1 = 10 -Alexnet2.error_range_2 = 13 -Alexnet2.start_promise_range = 1 -#Alexnet2.skip_layer_str = "0" -Alexnet2.skip_layer_str = "6_1_0" - -Alexnet2.base_dir = "../build_tuner/tuner_results/alexnet2_cifar10/" -Alexnet2.result_dir_1 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_1/" + batch_id -Alexnet2.result_dir_2 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_2/" + batch_id -Alexnet2.result_dir_3 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_3/" + batch_id -Alexnet2.tensor_desc_file = "tuner_results/alexnet2_cifar10/alexnet2_tensors.txt" -Alexnet2.layer_file = "tuner_results/alexnet2_cifar10/alexnet2_layers.txt" -Alexnet2.cost_file = "../build_tuner/tuner_results/alexnet2_cifar10/op_cost.txt" -Alexnet2.layer_knobs = "../opentuner/data/alexnet2/knobs.txt" -#Alexnet2.loss1_result_file = "tuner_results/alexnet2_cifar10/loss_1/promise_tuned_confs/promise_confs.txt" -#Alexnet2.loss2_result_file = "tuner_results/alexnet2_cifar10/loss_2/promise_tuned_confs/promise_confs.txt" -Alexnet2.autotuner_runs = 1000 -Alexnet2.tuner_accuracy = 84.19 -#Alexnet2.promise_accuracy = 84.19 -Alexnet2.promise_accuracy = 84.7 -Alexnet2.validation_accuracy = 85.15 - -bench_tuner_data["alexnet2_cifar10"] = Alexnet2 - - - -Alexnet3 = Benchmark() -Alexnet3.tuner_binary = "vgg16_cifar10_tuner" -Alexnet3.fp16_binary = "vgg16_cifar10_half" -Alexnet3.promise_binary = "./vgg16_cifar10_promise" -Alexnet3.validation_binary = "vgg16_cifar10_valid" -Alexnet3.num_flags = 50 -Alexnet3.num_layers = 15 -Alexnet3.error_range_1 = 9 -Alexnet3.error_range_2 = 11 -Alexnet3.start_promise_range = 1 -#Alexnet3.skip_layer_str = "0" -Alexnet3.skip_layer_str = "14_3_4_1_6" - -Alexnet3.base_dir = "../build_tuner/tuner_results/vgg16_cifar10/" -Alexnet3.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar10/loss_1/" + batch_id -Alexnet3.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar10/loss_2/" + batch_id -Alexnet3.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar10/loss_3/" + batch_id - -Alexnet3.tensor_desc_file = "tuner_results/vgg16_cifar10/vgg16_tensors.txt" -Alexnet3.layer_file = "tuner_results/vgg16_cifar10/vgg16_layers.txt" -Alexnet3.cost_file = "../build_tuner/tuner_results/vgg16_cifar10/op_cost.txt" -Alexnet3.layer_knobs = "../opentuner/data/vgg16_cifar10/knobs.txt" - -Alexnet3.loss1_result_file = "tuner_results/vgg16_cifar10/loss_1/promise_tuned_confs/promise_confs.txt" -Alexnet3.loss2_result_file = "tuner_results/vgg16_cifar10/loss_2/promise_tuned_confs/promise_confs.txt" - -Alexnet3.autotuner_runs = 1000 -Alexnet3.tuner_accuracy = 90.19 -#Alexnet3.promise_accuracy = 90.19 -Alexnet3.promise_accuracy = 88.53 -Alexnet3.validation_accuracy = 89.05 - -bench_tuner_data["vgg16_cifar10"] = Alexnet3 - - - -Alexnet4 = Benchmark() -Alexnet4.tuner_binary = "resnet18_cifar10_tuner" -Alexnet4.fp16_binary = "resnet18_half" -Alexnet4.promise_binary = "resnet18_promise" -Alexnet4.validation_binary = "resnet18_valid" -Alexnet4.num_flags = 73 -Alexnet4.num_layers = 22 -Alexnet4.error_range_1 = 7 -Alexnet4.error_range_2 = 9 -Alexnet4.start_promise_range = 1 -#Alexnet4.skip_layer_str = "0" -Alexnet4.skip_layer_str = "0_1_2_14_15_17_18_21" -Alexnet4.base_dir = "../build_tuner/tuner_results/resnet18_cifar10/" -Alexnet4.result_dir_1 = "../build_tuner/tuner_results/resnet18_cifar10/loss_1/" + batch_id -Alexnet4.result_dir_2 = "../build_tuner/tuner_results/resnet18_cifar10/loss_2/" + batch_id -Alexnet4.result_dir_3 = "../build_tuner/tuner_results/resnet18_cifar10/loss_3/" + batch_id -Alexnet4.tensor_desc_file = "tuner_results/resnet18_cifar10/resnet_tensors.txt" -Alexnet4.layer_file = "tuner_results/resnet18_cifar10/resnet_layers.txt" -Alexnet4.cost_file = "../build_tuner/tuner_results/resnet18_cifar10/op_cost.txt" -Alexnet4.layer_knobs = "../opentuner/data/resnet/knobs.txt" - -Alexnet4.loss1_result_file = "tuner_results/resnet18_cifar10/loss_1/promise_tuned_confs/promise_confs.txt" -Alexnet4.loss2_result_file = "tuner_results/resnet18_cifar10/loss_2/promise_tuned_confs/promise_confs.txt" - -Alexnet4.autotuner_runs = 1500 -Alexnet4.tuner_accuracy = 89.6 -#Alexnet4.promise_accuracy = 89.59 - 1000 images -Alexnet4.promise_accuracy = 89.5 -Alexnet4.validation_accuracy = 89.65 - -bench_tuner_data["resnet18_cifar10"] = Alexnet4 - - - - - -Alexnet5 = Benchmark() -Alexnet5.tuner_binary = "vgg16_cifar100_tuner" -Alexnet5.fp16_binary = "vgg16_cifar100_half" -Alexnet5.promise_binary = "vgg16_cifar100_promise" -Alexnet5.validation_binary = "vgg16_cifar100_valid" -Alexnet5.num_flags = 50 -Alexnet5.num_layers = 15 -Alexnet5.error_range_1 = 9 -Alexnet5.error_range_2 = 11 -Alexnet5.start_promise_range = 1 -Alexnet5.skip_layer_str = "0_1_2_3_4" -Alexnet5.base_dir = "../build_tuner/tuner_results/vgg16_cifar100/" -Alexnet5.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar100/loss_1/" + batch_id -Alexnet5.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar100/loss_2/" + batch_id -Alexnet5.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar100/loss_3/" + batch_id - -Alexnet5.tensor_desc_file = "../build_tuner/tuner_results/vgg16_cifar100/vgg16_tensors.txt" -Alexnet5.layer_file = "../build_tuner/tuner_results/vgg16_cifar100/vgg16_layers.txt" -Alexnet5.cost_file = "../build_tuner/tuner_results/vgg16_cifar100/op_cost.txt" -Alexnet5.layer_knobs = "../opentuner/data/vgg16_cifar100/knobs.txt" - -Alexnet5.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -Alexnet5.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Alexnet5.autotuner_runs = 1000 -Alexnet5.tuner_accuracy = 67.95 -#Alexnet5.promise_accuracy = 66.8 -Alexnet5.promise_accuracy = 67.86 -Alexnet5.validation_accuracy = 68.65 - -bench_tuner_data["vgg16_cifar100"] = Alexnet5 - - - -Alexnet6 = Benchmark() -Alexnet6.tuner_binary = "lenet_keras" -Alexnet6.fp16_binary = "lenet_half" -Alexnet6.promise_binary = "lenet_promise" -Alexnet6.validation_binary = "lenet_promise" - -Alexnet6.num_flags = 14 -Alexnet6.num_layers = 4 -Alexnet6.error_range_1 = 16 -Alexnet6.error_range_2 = 20 -Alexnet6.start_promise_range = 1 -Alexnet6.skip_layer_str = "0" - -Alexnet6.base_dir = "../build_tuner/tuner_results/lenet_keras/" -Alexnet6.result_dir = "../build_tuner/tuner_results/lenet_keras/loss_123/" + batch_id -Alexnet6.result_dir_1 = "../build_tuner/tuner_results/lenet_keras/loss_1/" + batch_id -Alexnet6.result_dir_2 = "../build_tuner/tuner_results/lenet_keras/loss_2/" + batch_id -Alexnet6.result_dir_3 = "../build_tuner/tuner_results/lenet_keras/loss_3/" + batch_id - -Alexnet6.tensor_desc_file = "tuner_results/lenet_keras/lenet_tensors.txt" -Alexnet6.layer_file = "tuner_results/lenet_keras/lenet_layers.txt" -Alexnet6.cost_file = "../build_tuner/tuner_results/lenet_keras/op_cost.txt" -Alexnet6.layer_knobs = "../autotuner/data/lenet/knobs.txt" - -#Alexnet6.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Alexnet6.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Alexnet6.autotuner_runs = 1000 -Alexnet6.tuner_accuracy = 98.9 -Alexnet6.promise_accuracy = 99.7 -Alexnet6.validation_accuracy = 99 - -bench_tuner_data["lenet_keras"] = Alexnet6 - - - - -Alexnet7 = Benchmark() -Alexnet7.tuner_binary = "mobilenet_cifar10" -Alexnet7.fp16_binary = "mobilenet_half" -Alexnet7.promise_binary = "mobilenet_promise" -Alexnet7.validation_binary = "mobilenet_valid" -Alexnet7.num_flags = 85 -Alexnet7.num_layers = 15 -Alexnet7.error_range_1 = 7 -Alexnet7.error_range_2 = 8 -Alexnet7.start_promise_range = 1 -#Alexnet7.skip_layer_str = "0" -Alexnet7.skip_layer_str = "1_14_0_6_2" -Alexnet7.base_dir = "../build_tuner/tuner_results/mobilenet/" -Alexnet7.result_dir_1 = "../build_tuner/tuner_results/mobilenet/loss_1/" + batch_id -Alexnet7.result_dir_2 = "../build_tuner/tuner_results/mobilenet/loss_2/" + batch_id -Alexnet7.result_dir_3 = "../build_tuner/tuner_results/mobilenet/loss_3/" + batch_id - -Alexnet7.tensor_desc_file = "tuner_results/mobilenet/mobilenet_ops.txt" -Alexnet7.layer_file = "tuner_results/mobilenet/mobilenet_layer_comp.txt" -Alexnet7.cost_file = "../build_tuner/tuner_results/mobilenet/op_cost.txt" -Alexnet7.layer_knobs = "../opentuner/data/mobilenet/knobs.txt" - -#--- Files below needed for VALIDATION experiment -Alexnet7.loss1_result_file = "tuner_results/mobilenet/loss_1/batch1/promise_tuner/high_confidence/promise_confs.txt" -Alexnet7.loss2_result_file = "tuner_results/mobilenet/loss_2/batch1/promise_tuner/high_confidence/promise_confs.txt" -Alexnet7.autotuner_runs = 1000 -Alexnet7.tuner_accuracy = 84.8 -#Alexnet7.promise_accuracy = 84.8 -Alexnet7.promise_accuracy = 83.73 -Alexnet7.validation_accuracy = 84.4 - -bench_tuner_data["mobilenet_cifar10"] = Alexnet7 - - - -Alexnet8 = Benchmark() -Alexnet8.tuner_binary = "mobilenet_cifar10_shallow" -Alexnet8.fp16_binary = "mobilenet_shallow_half" -Alexnet8.promise_binary = "mobilenet_shallow_promise" -Alexnet8.validation_binary = "mobilenet_shallow_valid" -Alexnet8.num_flags = 42 -Alexnet8.num_layers = 8 -Alexnet8.error_range_1 = 10 -Alexnet8.error_range_2 = 12 -Alexnet8.start_promise_range = 1 -#Alexnet8.skip_layer_str = "0" -Alexnet8.skip_layer_str = "7_0_1" -Alexnet8.base_dir = "../build_tuner/tuner_results/mobilenet_shallow/" -Alexnet8.result_dir_1 = "../build_tuner/tuner_results/mobilenet_shallow/loss_1/" + batch_id -Alexnet8.result_dir_2 = "../build_tuner/tuner_results/mobilenet_shallow/loss_2/" + batch_id -Alexnet8.result_dir_3 = "../build_tuner/tuner_results/mobilenet_shallow/loss_3/" + batch_id - -Alexnet8.tensor_desc_file = "../build_tuner/tuner_results/mobilenet_shallow/mobilenet_shallow_ops.txt" -Alexnet8.layer_file = "../build_tuner/tuner_results/mobilenet_shallow/mobilenet_shallow_layer_comp.txt" -Alexnet8.cost_file = "../build_tuner/tuner_results/mobilenet_shallow/op_cost.txt" -Alexnet8.layer_knobs = "../opentuner/data/mobilenet_shallow/knobs.txt" - -Alexnet8.loss1_result_file = "../build_tuner/tuner_results/mobilenet_shallow/loss_1/batch2/promise_tuner/high_confidence/promise_selected_confs.txt" -Alexnet8.loss2_result_file = "../build_tuner/tuner_results/mobilenet_shallow/loss_2/batch2/promise_tuner/high_confidence/promise_selected_confs.txt" - -Alexnet8.autotuner_runs = 1000 -Alexnet8.tuner_accuracy = 87.6 -#Alexnet8.promise_accuracy = 87.59 -Alexnet8.promise_accuracy = 87.76 -Alexnet8.validation_accuracy = 88.5 - -bench_tuner_data["mobilenet_shallow"] = Alexnet8 - - - -""" -Alexnet9 = Benchmark() -Alexnet9.tuner_binary = "fc4_clipped" -Alexnet9.promise_binary = "" -Alexnet9.validation_binary = "" -Alexnet9.num_flags = 12 -Alexnet9.num_layers = 4 -Alexnet9.error_range_1 = 12 -Alexnet9.error_range_2 = 16 -Alexnet9.start_promise_range = 3 -Alexnet9.skip_layer_str = "0" -Alexnet9.base_dir = "../build_tuner/tuner_results/fc4/" -Alexnet9.result_dir_1 = "../build_tuner/tuner_results/fc4/loss1/batch1" -Alexnet9.result_dir_2 = "../build_tuner/tuner_results/fc4/loss2/batch1" -Alexnet9.tensor_desc_file = "" -Alexnet9.layer_file = "" - -Alexnet9.loss1_result_file = "" -Alexnet9.loss2_result_file = "" - -Alexnet9.autotuner_runs = 1000 -Alexnet9.tuner_accuracy = 93.8 -Alexnet9.promise_accuracy = 0.0 -Alexnet9.validation_accuracy = 0.0 - -bench_tuner_data["fc4"] = Alexnet9 - - - - -Pipeline1 = Benchmark() -Pipeline1.tuner_binary = "pipeline_GEOM" -Pipeline1.promise_binary = "pipeline_GEOM_promise" -Pipeline1.validation_binary = "pipeline_GEOM_valid" -Pipeline1.num_flags = 9 -Pipeline1.num_layers = 4 -Pipeline1.error_range_1 = 10 -Pipeline1.error_range_2 = 15 -Pipeline1.start_promise_range = 2 -Pipeline1.skip_layer_str = "1_2" -Pipeline1.result_dir_1 = "tuner_results/pipeline_GEOM/loss_30/batch1" -Pipeline1.result_dir_2 = "tuner_results/pipeline_GEOM/loss_20/batch1" -Pipeline1.tensor_desc_file = "tuner_results/pipeline_GEOM/pipeline_GEOM_tensors.txt" -Pipeline1.layer_file = "tuner_results/pipeline_GEOM/pipeline_GEOM_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline1.autotuner_runs = 300 -Pipeline1.tuner_accuracy = 95 -Pipeline1.promise_accuracy = 95 -Pipeline1.validation_accuracy = 95 - -bench_tuner_data["pipeline_GEOM"] = Pipeline1 - - -Pipeline2 = Benchmark() -Pipeline2.tuner_binary = "pipeline_GEMO" -Pipeline2.promise_binary = "pipeline_GEMO_promise" -Pipeline2.validation_binary = "pipeline_GEMO_valid" -Pipeline2.num_flags = 9 -Pipeline2.num_layers = 4 -Pipeline2.error_range_1 = 10 -Pipeline2.error_range_2 = 15 -Pipeline2.start_promise_range = 2 -Pipeline2.skip_layer_str = "1_3" -Pipeline2.result_dir_1 = "tuner_results/pipeline_GEMO/loss_30/batch1" -Pipeline2.result_dir_2 = "tuner_results/pipeline_GEMO/loss_20/batch1" -Pipeline2.tensor_desc_file = "tuner_results/pipeline_GEMO/pipeline_GEMO_tensors.txt" -Pipeline2.layer_file = "tuner_results/pipeline_GEMO/pipeline_GEMO_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline2.autotuner_runs = 300 -Pipeline2.tuner_accuracy = 95 -Pipeline2.promise_accuracy = 95 -Pipeline2.validation_accuracy = 95 - -bench_tuner_data["pipeline_GEMO"] = Pipeline2 - - - - -Pipeline3 = Benchmark() -Pipeline3.tuner_binary = "pipeline_GSME" -Pipeline3.promise_binary = "pipeline_GSME_promise" -Pipeline3.validation_binary = "pipeline_GSME_valid" -Pipeline3.num_flags = 9 -Pipeline3.num_layers = 4 -Pipeline3.error_range_1 = 10 -Pipeline3.error_range_2 = 15 -Pipeline3.start_promise_range = 2 -Pipeline3.skip_layer_str = "1_3" -Pipeline3.result_dir_1 = "tuner_results/pipeline_GSME/loss_30/batch1" -Pipeline3.result_dir_2 = "tuner_results/pipeline_GSME/loss_20/batch1" -Pipeline3.tensor_desc_file = "tuner_results/pipeline_GSME/pipeline_GSME_tensors.txt" -Pipeline3.layer_file = "tuner_results/pipeline_GSME/pipeline_GSME_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline3.autotuner_runs = 300 -Pipeline3.tuner_accuracy = 95 -Pipeline3.promise_accuracy = 95 -Pipeline3.validation_accuracy = 95 - -bench_tuner_data["pipeline_GSME"] = Pipeline3 - - -Pipeline4 = Benchmark() -Pipeline4.tuner_binary = "pipeline_GEO" -Pipeline4.promise_binary = "pipeline_GEO_promise" -Pipeline4.validation_binary = "pipeline_GEO_valid" -Pipeline4.num_flags = 7 -Pipeline4.num_layers = 3 -Pipeline4.error_range_1 = 10 -Pipeline4.error_range_2 = 15 -Pipeline4.start_promise_range = 2 -Pipeline4.skip_layer_str = "1_2" -Pipeline4.result_dir_1 = "tuner_results/pipeline_GEO/loss_30/batch1" -Pipeline4.result_dir_2 = "tuner_results/pipeline_GEO/loss_20/batch1" -Pipeline4.tensor_desc_file = "tuner_results/pipeline_GEO/pipeline_GEO_tensors.txt" -Pipeline4.layer_file = "tuner_results/pipeline_GEO/pipeline_GEO_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline4.autotuner_runs = 300 -Pipeline4.tuner_accuracy = 95 -Pipeline4.promise_accuracy = 95 -Pipeline4.validation_accuracy = 95 - -bench_tuner_data["pipeline_GEO"] = Pipeline4 - - -Pipeline5 = Benchmark() -Pipeline5.tuner_binary = "pipeline_GSM" -Pipeline5.promise_binary = "pipeline_GSM_promise" -Pipeline5.validation_binary = "pipeline_GSM_valid" -Pipeline5.num_flags = 6 -Pipeline5.num_layers = 3 -Pipeline5.error_range_1 = 10 -Pipeline5.error_range_2 = 15 -Pipeline5.start_promise_range = 2 -Pipeline5.skip_layer_str = "1_1" -Pipeline5.result_dir_1 = "tuner_results/pipeline_GSM/loss_30/batch1" -Pipeline5.result_dir_2 = "tuner_results/pipeline_GSM/loss_20/batch1" -Pipeline5.tensor_desc_file = "tuner_results/pipeline_GSM/pipeline_GSM_tensors.txt" -Pipeline5.layer_file = "tuner_results/pipeline_GSM/pipeline_GSM_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline5.autotuner_runs = 300 -Pipeline5.tuner_accuracy = 95 -Pipeline5.promise_accuracy = 95 -Pipeline5.validation_accuracy = 95 - -bench_tuner_data["pipeline_GSM"] = Pipeline5 - -""" - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/benchmarks.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/benchmarks.py deleted file mode 100644 index 0662ddaa76e359c3d3b1d911d17d01394aaab654..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/benchmarks.py +++ /dev/null @@ -1,599 +0,0 @@ - -# NOTES: Batch13 (Baseline) Batch14 - With ErrorSens (10, 25, 35) - -# Batch 9: No Error Sens. Min : P3 -# Batch 10: No Error Sens + More Runs for Loss1 and Loss2. Min: P3 -# Batch 11: Error Sens: Skipping 30% elems in each : Min: P3. More runs in Loss1 (4000) and Loss2 (2000) -# Batch 12: Error Sens: 10, 25, 35, for Loss1, 2, 3, respectively, Min: P3. 1000 Runs for All -# Batch 13: No Error Sens: Equal Runs (1000) for all. Min: P1 -# Batch 14: Reruning Batch12 with bugFix! -# Batch 15: MAJOR CHANGE: 3 different skip levels for each Loss1,Loss2,Loss3 - -# Batch 18: Batch13 (Basline) + ParetoCurve (1500 Runs) - BUGGY IGNORE!!! - -# Batch 19: (Basline) + ParetoCurve + 2 runs in Tuning Phase (1500 Runs) - -# Batch 20: 3-Skip levels + + 2 runs + 1500 Runs + EnergyBandSize now % of Max (Compare against Batch19 - - -# Batch 200: AlgoTuner - 1000 images - 1500 runs (IGNORE) -# Batch 201: AlgoTuner - 2000 images - 1500 runs -# Batch 202: AlgoTuner - 2000 images - 500 runs -# Batch 203: AlgoTuner - 2000 images - 3000 runs - - -#---- CHANGES: i) Reshufled inputs ii) 3K images for tuning -# Batch 210: 3K images, 1000 runs (1500 resnet), no FP32 used in tuning -# Batch 211: Same as Batch-210 + uses tensorConvPerfCuda*Half* - - -#batch_id = "batch210" -#batch_id = "batch211" -#batch_id = "batch210" - -# NOTE: Testing new devtuner script -#batch_id = "batch311" - -# NOTE: batch with 3K runs each - new devtuner script -#batch_id = "batch312" - - -# NOTE: Trying out piped execution -#batch_id = "batch313" - -# NOTE: Trying out piped execution with 3K each - to measure time is the goal -#batch_id = "batch314" - -# NOTE: Trying out VGG16 Imagenet with new error slack approach -# batch_id = "batch_315" - -# NOTE: Using Batch with 2K images in VGG16_imagenet -#-- batch_id = "batch316" - -# Running all non imagenet DNNs for Yifan - ENDED UP TESTING RUN -#-- batch_id = "batch321" - - -# Running all non imagenet DNNs for Yifan - Long Running -#--- batch_id = "batch322" - - -# Re-Running VGG16_imagenet after issues with python setup -#-- batch_id = "batch323" - - -# Re-Running all CIFAR-10 benchmarks after using AUTOMATIC KNOBS -#-- batch_id = "batch324" - -# After Fixing Yasmin's code first batch of runs on CIFAR-10 DNNs -# NOTE: First batch with 33% sampling - 2K runs for each threshold -#-- batch_id = "batch325" - - -# After Fixing Yasmin's code second batch of runs on CIFAR-10 DNNs -# NOTE: Second batch with 33% sampling - 5K runs for each threshold -# NOTE: First batch with dumping CPU runtime configs -#-- batch_id = "batch327" - - -# IMP: Increased SAMPLING Knobs ---- Adding interpolation-based Knobs - 8K iterations -#---- batch_id = "batch328" - - -# IMP: Increased SAMPLING Knobs ---- Adding interpolation-based Knobs -- 12K -#-- batch_id = "batch329" - -# IMP: Increased SAMPLING Knobs ---- Adding interpolation-based Knobs -- 12K - NEW: 5K images calibration set -#-- batch_id = "batch330" - - -# IMP: Increased SAMPLING Knobs -- 20K iterations - NEW: 5K images calibration set -- fixed bugs -#-- batch_id = "batch331" - -# testing install-time tuner -#batch_id = "batch340" - -# First run of install time tuner -batch_id = "batch341" - -# Install Timer Tuner with FP32 SEEDING -batch_id = "batch342" - -# Install Timer Tuner with FP32 SEEDING -# FIRST time reducing sampling knobs 239 above -# Fixed bugs --- added FP32 to search space -batch_id = "batch343" - - -# testing pareto-only validation -batch_id = "batch344" - - -# First Install-time tuning run with different iterations per DNN benchmark -batch_id = "batch345" - - -# First Install-time tuning run with 10K iterations per DNN benchmark -batch_id = "batch346" - - - -class Benchmark: - def __init__(self): - self.tuner_binary = "" - self.promise_binary = "" - self.tuner_accuracy = 0 - self.promise_accuracy = 0 - self.num_flags = 0 - self.num_layers = 0 - self.autotuner_runs = 0 - self.error_range_1 = 0 - self.error_range_2 = 0 - self.result_dir_1 = "" - self.result_dir_2 = "" - self.promise_result_dir_1 = "" - self.promise_result_dir_2 = "" - - - -bench_tuner_data = {} - - -LeNet = Benchmark() -LeNet.tuner_binary = "lenet_keras" -LeNet.fp16_binary = "lenet_half" -LeNet.promise_binary = "lenet_promise" -LeNet.piped_binary = "lenet_piped" -LeNet.validation_binary = "lenet_promise" - -LeNet.num_flags = 14 -LeNet.num_layers = 4 -LeNet.error_range_1 = 16 -LeNet.error_range_2 = 20 -LeNet.start_promise_range = 1 -LeNet.skip_layer_str = "0" - -LeNet.base_dir = "tuner_results/lenet_keras/" - -LeNet.tensor_desc_file = "autotuner/data/lenet/lenet_tensors.txt" -LeNet.layer_file = "autotuner/data/lenet/lenet_layers.txt" -LeNet.cost_file = "autotuner/data/lenet/op_cost.txt" -LeNet.layer_knobs = "autotuner/data/lenet/dev_knobs.txt" - -LeNet.autotuner_runs = 2000 -LeNet.tuner_accuracy = 98.9 -LeNet.promise_accuracy = 99.7 -LeNet.validation_accuracy = 99 - -bench_tuner_data["lenet_keras"] = LeNet - - - - - - -# FIXIT: Fix Variable Names below -Alexnet = Benchmark() -Alexnet.tuner_binary = "alexnet_cifar10_tuner" -Alexnet.fp16_binary = "alexnet_half" -Alexnet.promise_binary = "alexnet_promise" -Alexnet.piped_binary = "alexnet_piped" -Alexnet.validation_binary = "alexnet_valid" -Alexnet.num_flags = 21 -Alexnet.num_layers = 6 -Alexnet.error_range_1 = 10 -Alexnet.error_range_2 = 13 -Alexnet.start_promise_range = 1 -Alexnet.skip_layers = 0 -Alexnet.skip_layer_str = "5_0" - -Alexnet.base_dir = "tuner_results/alexnet_cifar10/" - -Alexnet.tensor_desc_file = "autotuner/data/alexnet/alexnet_tensors.txt" -Alexnet.layer_file = "autotuner/data/alexnet/alexnet_layers.txt" -Alexnet.cost_file = "autotuner/data/alexnet/op_cost.txt" -Alexnet.layer_knobs = "autotuner/data/alexnet/dev_knobs.txt" - -Alexnet.autotuner_runs = 4000 -Alexnet.tuner_accuracy = 79.9 -Alexnet.promise_accuracy = 78.86 -Alexnet.validation_accuracy = 79.19 - -bench_tuner_data["alexnet_cifar10"] = Alexnet - - -Alexnet2 = Benchmark() -Alexnet2.tuner_binary = "alexnet2_cifar10_tuner" -Alexnet2.fp16_binary = "alexnet2_half" -Alexnet2.promise_binary = "alexnet2_promise" -Alexnet2.piped_binary = "alexnet2_piped" -Alexnet2.validation_binary = "alexnet2_valid" -Alexnet2.num_flags = 23 -Alexnet2.num_layers = 7 -Alexnet2.error_range_1 = 10 -Alexnet2.error_range_2 = 13 -Alexnet2.start_promise_range = 1 -Alexnet2.skip_layer_str = "6_1_0" - -Alexnet2.base_dir = "tuner_results/alexnet2_cifar10/" - -Alexnet2.tensor_desc_file = "autotuner/data/alexnet2/alexnet2_tensors.txt" -Alexnet2.layer_file = "autotuner/data/alexnet2/alexnet2_layers.txt" -Alexnet2.cost_file = "autotuner/data/alexnet2/op_cost.txt" -Alexnet2.layer_knobs = "autotuner/data/alexnet2/dev_knobs.txt" -Alexnet2.autotuner_runs = 4000 -Alexnet2.tuner_accuracy = 84.19 -Alexnet2.promise_accuracy = 84.7 -Alexnet2.validation_accuracy = 85.15 - -bench_tuner_data["alexnet2_cifar10"] = Alexnet2 - - - -VGG16_10 = Benchmark() -VGG16_10.tuner_binary = "vgg16_cifar10_tuner" -VGG16_10.fp16_binary = "vgg16_cifar10_half" -VGG16_10.promise_binary = "./vgg16_cifar10_promise" -VGG16_10.piped_binary = "./vgg16_cifar10_piped" -VGG16_10.validation_binary = "vgg16_cifar10_valid" -VGG16_10.num_flags = 50 -VGG16_10.num_layers = 15 -VGG16_10.error_range_1 = 9 -VGG16_10.error_range_2 = 11 -VGG16_10.start_promise_range = 1 -VGG16_10.skip_layer_str = "14_3_4_1_6" - -VGG16_10.base_dir = "tuner_results/vgg16_cifar10/" - -VGG16_10.tensor_desc_file = "autotuner/data/vgg16_cifar10/vgg16_tensors.txt" -VGG16_10.layer_file = "autotuner/data/vgg16_cifar10/vgg16_layers.txt" -VGG16_10.cost_file = "autotuner/data/vgg16_cifar10/op_cost.txt" -VGG16_10.layer_knobs = "autotuner/data/vgg16_cifar10/dev_knobs.txt" - -VGG16_10.autotuner_runs = 8000 -VGG16_10.tuner_accuracy = 90.19 - -VGG16_10.promise_accuracy = 88.53 -VGG16_10.validation_accuracy = 89.05 - -bench_tuner_data["vgg16_cifar10"] = VGG16_10 - - - - -VGG16_100 = Benchmark() -VGG16_100.tuner_binary = "vgg16_cifar100_tuner" -VGG16_100.fp16_binary = "vgg16_cifar100_half" -VGG16_100.promise_binary = "vgg16_cifar100_promise" -VGG16_100.piped_binary = "vgg16_cifar100_piped" -VGG16_100.validation_binary = "vgg16_cifar100_valid" -VGG16_100.num_flags = 50 -VGG16_100.num_layers = 15 -VGG16_100.error_range_1 = 9 -VGG16_100.error_range_2 = 11 -VGG16_100.start_promise_range = 1 -VGG16_100.skip_layer_str = "0_1_2_3_4" - -VGG16_100.base_dir = "tuner_results/vgg16_cifar100/" - -VGG16_100.tensor_desc_file = "autotuner/data/vgg16_cifar100/vgg16_tensors.txt" -VGG16_100.layer_file = "autotuner/data/vgg16_cifar100/vgg16_layers.txt" -VGG16_100.cost_file = "autotuner/data/vgg16_cifar100/op_cost.txt" -VGG16_100.layer_knobs = "autotuner/data/vgg16_cifar100/dev_knobs.txt" - -VGG16_100.autotuner_runs = 5000 -VGG16_100.tuner_accuracy = 67.95 - -VGG16_100.promise_accuracy = 67.86 -VGG16_100.validation_accuracy = 68.65 - -bench_tuner_data["vgg16_cifar100"] = VGG16_100 - - - - -VGG16_imagenet = Benchmark() -VGG16_imagenet.tuner_binary = "" -VGG16_imagenet.fp16_binary = "" -VGG16_imagenet.promise_binary = "vgg16_imagenet_promise" -VGG16_imagenet.piped_binary = "vgg16_imagenet_piped" -VGG16_imagenet.validation_binary = "vgg16_imagenet_promise" -VGG16_imagenet.num_flags = 53 -VGG16_imagenet.num_layers = 16 - -VGG16_imagenet.base_dir = "tuner_results/vgg16_imagenet/" -VGG16_imagenet.tensor_desc_file = "autotuner/data/vgg16_imagenet/vgg16_tensors.txt" -VGG16_imagenet.layer_file = "autotuner/data/vgg16_imagenet/vgg16_layers.txt" -VGG16_imagenet.cost_file = "autotuner/data/vgg16_imagenet/op_cost.txt" -VGG16_imagenet.layer_knobs = "autotuner/data/vgg16_imagenet/dev_knobs.txt" - -VGG16_imagenet.autotuner_runs = 5000 -VGG16_imagenet.tuner_accuracy = 0.0 -VGG16_imagenet.promise_accuracy = 69.62 -VGG16_imagenet.validation_accuracy = 69.62 - -#-- bench_tuner_data["vgg16_imagenet"] = VGG16_imagenet - - - - -ResNet = Benchmark() -ResNet.tuner_binary = "resnet18_cifar10_tuner" -ResNet.fp16_binary = "resnet18_half" -ResNet.promise_binary = "resnet18_promise" -ResNet.piped_binary = "resnet18_piped" -ResNet.validation_binary = "resnet18_valid" -ResNet.num_flags = 73 -ResNet.num_layers = 22 -ResNet.error_range_1 = 7 -ResNet.error_range_2 = 9 -ResNet.start_promise_range = 1 - -ResNet.skip_layer_str = "0_1_2_14_15_17_18_21" -ResNet.base_dir = "tuner_results/resnet18_cifar10/" - -ResNet.tensor_desc_file = "autotuner/data/resnet/resnet_tensors.txt" -ResNet.layer_file = "autotuner/data/resnet/resnet_layers.txt" -ResNet.cost_file = "autotuner/data/resnet/op_cost.txt" -ResNet.layer_knobs = "autotuner/data/resnet/dev_knobs.txt" - -ResNet.autotuner_runs = 8000 -ResNet.tuner_accuracy = 89.6 - -ResNet.promise_accuracy = 89.5 -ResNet.validation_accuracy = 89.65 - -bench_tuner_data["resnet18_cifar10"] = ResNet - - - - - - -ResNet50 = Benchmark() -ResNet50.tuner_binary = "" -ResNet50.fp16_binary = "" -ResNet50.promise_binary = "resnet50_imagenet_promise" -ResNet50.piped_binary = "resnet50_imagenet_piped" -ResNet50.validation_binary = "resnet50_valid" -ResNet50.num_flags = 1 # FIXIT -ResNet50.num_layers = 54 - -ResNet50.base_dir = "tuner_results/resnet50_imagenet/" - -ResNet50.tensor_desc_file = "autotuner/data/resnet50_imagenet/resnet50_tensors.txt" -ResNet50.layer_file = "autotuner/data/resnet50_imagenet/resnet50_layers.txt" -ResNet50.cost_file = "autotuner/data/resnet50_imagenet/op_cost.txt" -ResNet50.layer_knobs = "autotuner/data/resnet50_imagenet/dev_knobs.txt" - -ResNet50.autotuner_runs = 5000 -ResNet50.tuner_accuracy = 89.6 - -ResNet50.promise_accuracy = 77 -ResNet50.validation_accuracy = 20 # FIXIT - -#--- bench_tuner_data["resnet50_imagenet"] = ResNet50 - - - - - - - - - -MobileNet = Benchmark() -MobileNet.tuner_binary = "mobilenet_cifar10" -MobileNet.fp16_binary = "mobilenet_half" -MobileNet.promise_binary = "mobilenet_promise" -MobileNet.piped_binary = "mobilenet_piped" -MobileNet.validation_binary = "mobilenet_valid" -MobileNet.num_flags = 85 -MobileNet.num_layers = 15 -MobileNet.error_range_1 = 7 -MobileNet.error_range_2 = 8 -MobileNet.start_promise_range = 1 - -MobileNet.skip_layer_str = "1_14_0_6_2" -MobileNet.base_dir = "tuner_results/mobilenet/" - -MobileNet.tensor_desc_file = "autotuner/data/mobilenet/mobilenet_ops.txt" -MobileNet.layer_file = "autotuner/data/mobilenet/mobilenet_layer_comp.txt" -MobileNet.cost_file = "autotuner/data/mobilenet/op_cost.txt" -MobileNet.layer_knobs = "autotuner/data/mobilenet/dev_knobs.txt" - -MobileNet.autotuner_runs = 8000 -MobileNet.tuner_accuracy = 84.8 - -MobileNet.promise_accuracy = 83.73 -MobileNet.validation_accuracy = 84.4 - -bench_tuner_data["mobilenet_cifar10"] = MobileNet - - - -MobileNet_SH = Benchmark() -MobileNet_SH.tuner_binary = "mobilenet_cifar10_shallow" -MobileNet_SH.fp16_binary = "mobilenet_shallow_half" -MobileNet_SH.promise_binary = "mobilenet_shallow_promise" -MobileNet_SH.piped_binary = "mobilenet_shallow_piped" -MobileNet_SH.validation_binary = "mobilenet_shallow_valid" -MobileNet_SH.num_flags = 42 -MobileNet_SH.num_layers = 8 -MobileNet_SH.error_range_1 = 10 -MobileNet_SH.error_range_2 = 12 -MobileNet_SH.start_promise_range = 1 - -MobileNet_SH.skip_layer_str = "7_0_1" -MobileNet_SH.base_dir = "tuner_results/mobilenet_shallow/" - -MobileNet_SH.tensor_desc_file = "autotuner/data/mobilenet_shallow/mobilenet_shallow_ops.txt" -MobileNet_SH.layer_file = "autotuner/data/mobilenet_shallow/mobilenet_shallow_layer_comp.txt" -MobileNet_SH.cost_file = "autotuner/data/mobilenet_shallow/op_cost.txt" -MobileNet_SH.layer_knobs = "autotuner/data/mobilenet_shallow/dev_knobs.txt" - - -MobileNet_SH.autotuner_runs = 1000 -MobileNet_SH.tuner_accuracy = 87.6 - -MobileNet_SH.promise_accuracy = 87.76 -MobileNet_SH.validation_accuracy = 88.5 - -#-- bench_tuner_data["mobilenet_shallow"] = MobileNet_SH - - - -""" -Alexnet9 = Benchmark() -FC4.tuner_binary = "fc4_clipped" -FC4.promise_binary = "" -FC4.validation_binary = "" -FC4.num_flags = 12 -FC4.num_layers = 4 -FC4.error_range_1 = 12 -FC4.error_range_2 = 16 -FC4.start_promise_range = 3 -FC4.skip_layer_str = "0" -FC4.base_dir = "../build_tuner/tuner_results/fc4/" -FC4.result_dir_1 = "../build_tuner/tuner_results/fc4/loss1/batch1" -FC4.result_dir_2 = "../build_tuner/tuner_results/fc4/loss2/batch1" -FC4.tensor_desc_file = "" -FC4.layer_file = "" - -FC4.loss1_result_file = "" -FC4.loss2_result_file = "" - -FC4.autotuner_runs = 1000 -FC4.tuner_accuracy = 93.8 -FC4.promise_accuracy = 0.0 -FC4.validation_accuracy = 0.0 - -bench_tuner_data["fc4"] = FC4 - - - - -Pipeline1 = Benchmark() -Pipeline1.tuner_binary = "pipeline_GEOM" -Pipeline1.promise_binary = "pipeline_GEOM_promise" -Pipeline1.validation_binary = "pipeline_GEOM_valid" -Pipeline1.num_flags = 9 -Pipeline1.num_layers = 4 -Pipeline1.error_range_1 = 10 -Pipeline1.error_range_2 = 15 -Pipeline1.start_promise_range = 2 -Pipeline1.skip_layer_str = "1_2" -Pipeline1.result_dir_1 = "tuner_results/pipeline_GEOM/loss_30/batch1" -Pipeline1.result_dir_2 = "tuner_results/pipeline_GEOM/loss_20/batch1" -Pipeline1.tensor_desc_file = "tuner_results/pipeline_GEOM/pipeline_GEOM_tensors.txt" -Pipeline1.layer_file = "tuner_results/pipeline_GEOM/pipeline_GEOM_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline1.autotuner_runs = 300 -Pipeline1.tuner_accuracy = 95 -Pipeline1.promise_accuracy = 95 -Pipeline1.validation_accuracy = 95 - -bench_tuner_data["pipeline_GEOM"] = Pipeline1 - - -Pipeline2 = Benchmark() -Pipeline2.tuner_binary = "pipeline_GEMO" -Pipeline2.promise_binary = "pipeline_GEMO_promise" -Pipeline2.validation_binary = "pipeline_GEMO_valid" -Pipeline2.num_flags = 9 -Pipeline2.num_layers = 4 -Pipeline2.error_range_1 = 10 -Pipeline2.error_range_2 = 15 -Pipeline2.start_promise_range = 2 -Pipeline2.skip_layer_str = "1_3" -Pipeline2.result_dir_1 = "tuner_results/pipeline_GEMO/loss_30/batch1" -Pipeline2.result_dir_2 = "tuner_results/pipeline_GEMO/loss_20/batch1" -Pipeline2.tensor_desc_file = "tuner_results/pipeline_GEMO/pipeline_GEMO_tensors.txt" -Pipeline2.layer_file = "tuner_results/pipeline_GEMO/pipeline_GEMO_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline2.autotuner_runs = 300 -Pipeline2.tuner_accuracy = 95 -Pipeline2.promise_accuracy = 95 -Pipeline2.validation_accuracy = 95 - -bench_tuner_data["pipeline_GEMO"] = Pipeline2 - - - - -Pipeline3 = Benchmark() -Pipeline3.tuner_binary = "pipeline_GSME" -Pipeline3.promise_binary = "pipeline_GSME_promise" -Pipeline3.validation_binary = "pipeline_GSME_valid" -Pipeline3.num_flags = 9 -Pipeline3.num_layers = 4 -Pipeline3.error_range_1 = 10 -Pipeline3.error_range_2 = 15 -Pipeline3.start_promise_range = 2 -Pipeline3.skip_layer_str = "1_3" -Pipeline3.result_dir_1 = "tuner_results/pipeline_GSME/loss_30/batch1" -Pipeline3.result_dir_2 = "tuner_results/pipeline_GSME/loss_20/batch1" -Pipeline3.tensor_desc_file = "tuner_results/pipeline_GSME/pipeline_GSME_tensors.txt" -Pipeline3.layer_file = "tuner_results/pipeline_GSME/pipeline_GSME_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline3.autotuner_runs = 300 -Pipeline3.tuner_accuracy = 95 -Pipeline3.promise_accuracy = 95 -Pipeline3.validation_accuracy = 95 - -bench_tuner_data["pipeline_GSME"] = Pipeline3 - - -Pipeline4 = Benchmark() -Pipeline4.tuner_binary = "pipeline_GEO" -Pipeline4.promise_binary = "pipeline_GEO_promise" -Pipeline4.validation_binary = "pipeline_GEO_valid" -Pipeline4.num_flags = 7 -Pipeline4.num_layers = 3 -Pipeline4.error_range_1 = 10 -Pipeline4.error_range_2 = 15 -Pipeline4.start_promise_range = 2 -Pipeline4.skip_layer_str = "1_2" -Pipeline4.result_dir_1 = "tuner_results/pipeline_GEO/loss_30/batch1" -Pipeline4.result_dir_2 = "tuner_results/pipeline_GEO/loss_20/batch1" -Pipeline4.tensor_desc_file = "tuner_results/pipeline_GEO/pipeline_GEO_tensors.txt" -Pipeline4.layer_file = "tuner_results/pipeline_GEO/pipeline_GEO_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline4.autotuner_runs = 300 -Pipeline4.tuner_accuracy = 95 -Pipeline4.promise_accuracy = 95 -Pipeline4.validation_accuracy = 95 - -bench_tuner_data["pipeline_GEO"] = Pipeline4 - - -Pipeline5 = Benchmark() -Pipeline5.tuner_binary = "pipeline_GSM" -Pipeline5.promise_binary = "pipeline_GSM_promise" -Pipeline5.validation_binary = "pipeline_GSM_valid" -Pipeline5.num_flags = 6 -Pipeline5.num_layers = 3 -Pipeline5.error_range_1 = 10 -Pipeline5.error_range_2 = 15 -Pipeline5.start_promise_range = 2 -Pipeline5.skip_layer_str = "1_1" -Pipeline5.result_dir_1 = "tuner_results/pipeline_GSM/loss_30/batch1" -Pipeline5.result_dir_2 = "tuner_results/pipeline_GSM/loss_20/batch1" -Pipeline5.tensor_desc_file = "tuner_results/pipeline_GSM/pipeline_GSM_tensors.txt" -Pipeline5.layer_file = "tuner_results/pipeline_GSM/pipeline_GSM_layers.txt" -#Pipeline1.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_confs/promise_confs.txt" -#Pipeline1.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" -Pipeline5.autotuner_runs = 300 -Pipeline5.tuner_accuracy = 95 -Pipeline5.promise_accuracy = 95 -Pipeline5.validation_accuracy = 95 - -bench_tuner_data["pipeline_GSM"] = Pipeline5 - -""" - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/buildRtConfig.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/buildRtConfig.py deleted file mode 100644 index 5a6a9e0f03a27cac1190a4bf1e93dfd48810ffd9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/buildRtConfig.py +++ /dev/null @@ -1,583 +0,0 @@ - - -import os -import sys -import utils -from benchmarks import bench_tuner_data -from swing_selection import loadLayerDesc -from benchmarks import batch_id - - -op_mapping = {} -op_mapping["conv"] = "conv" -op_mapping["depthwise_conv"] = "group_conv" -op_mapping["dense"] = "mul" -op_mapping["batchnorm"] = "batchnorm" -op_mapping["pool"] = "pool_max" -op_mapping["pool_mean"] = "pool_mean" -op_mapping["activation"] = "relu" -op_mapping["tanh"] = "tanh" -op_mapping["add"] = "add" - - -approx_map = {} - - -def initializeApproxMap(knobs_file_path): - - f = open(knobs_file_path, "r") - - for x in f: - toks = x.split("\t") - approx_type = toks[0].split(",")[0] - knob_id = toks[0].split(",")[1] - approx_str = approx_type + " " + knob_id - approx_map[knob_id] = approx_str - - - print (approx_map) - - - - - - -class Config: - def __init__(self): - self.avg_accuracy = 0 - self.avg_loss = 0 - self.speedup = 1 - self.fname = "" - self.flags = [] - - - - -def isLayer(layer_comp): - if layer_comp[0] == "dense" or layer_comp[0] == "conv": - return True - else: - return False - - - -def getOpMapping(op_name): - - if op_name not in op_mapping: - print ("ERROR: OP not found!! = ", op_name, "\n") - sys.exit(0) - - return op_mapping[op_name] - - - -def getApproxMapping(flag, layer_comp): - - flag_str = str(flag) - if flag_str not in approx_map: - print ("ERROR: OP not found!! = ", flag_str, "\n") - sys.exit(0) - - if "dense" in layer_comp and flag > 7: - if flag == 12: - return "fp16 1" - else: - return "fp32 1" - - - return approx_map[flag_str] - - - -def skipFile(fname): - - skip_files = {} - skip_files["confidence_summary.txt"] = 1 - skip_files["promise_confs.txt"] = 1 - - if "accuracy" in fname: # *_accuracy files should be skipped - return True - - if "norms" in fname: # *_accuracy files should be skipped - return True - - if ".#" in fname: # *_accuracy files should be skipped - return True - - #if "_promise" in fname: # *_accuracy files should be skipped - # return True - - if not fname[-1].isdigit(): - return True - - if fname in skip_files: - return True - else: - return False - - - -def parseTopLine(x): - - toks = x.split() - - speedup = 1.0 - accuracy = 0.0 - for tok in toks: - if "avg_accuracy" in tok: - avg_accuracy = float(tok.split("=")[1]) - if "speedup" in tok: - speedup = float(tok.split("=")[1]) - - - return avg_accuracy, speedup - - - -def loadConfigData(result_dir, baseline_accuracy, sub_dir = "high_confidence"): - - config_arr = [] - - #result_dir += "/promise_tuner/high_confidence/" - #result_dir += "/algo_tuner/high_confidence/" - result_dir += "/algo_tuner/" + sub_dir + "/" - file_names = os.listdir(result_dir) - - - for fname in file_names: - if not skipFile(fname): - - fpath = result_dir + fname - config = Config() - f = open(fpath, "r") - - it = 0 - for x in f: - if x.strip == "": - continue - if it == 0: - avg_accuracy, speedup = parseTopLine(x) - config.avg_accuracy = avg_accuracy - config.avg_loss = baseline_accuracy - avg_accuracy - config.speedup = speedup - config.fname = fname - #print ("acc = " + str(avg_accuracy) + "\n") - else: - flag = int(x.strip()) - config.flags.append(flag) - it += 1 - - config_arr.append(config) - - - return config_arr - - - - -def loadConfigsFromDir(result_dir, baseline_accuracy): - - config_arr = [] - file_names = os.listdir(result_dir) - - for fname in file_names: - if not skipFile(fname): - - fpath = result_dir + '/' + fname - config = Config() - f = open(fpath, "r") - - it = 0 - for x in f: - if x.strip == "": - continue - if it == 0: - avg_accuracy, speedup = parseTopLine(x) - config.avg_accuracy = avg_accuracy - config.avg_loss = baseline_accuracy - avg_accuracy - config.speedup = speedup - config.fname = fname - #print ("acc = " + str(avg_accuracy) + "\n") - else: - flag = int(x.strip()) - config.flags.append(flag) - it += 1 - - config_arr.append(config) - - - return config_arr - - - - - - - -def loadPromiseConfigs(result_dir, baseline_accuracy, sub_dir = "promise_test"): - - config_arr = [] - result_dir += "/algo_tuner/" + sub_dir + "/" - file_names = os.listdir(result_dir) - - for fname in file_names: - if "_promise" in fname: - - fpath = result_dir + fname - config = Config() - f = open(fpath, "r") - - it = 0 - for x in f: - if x.strip == "": - continue - - if it == 0: - avg_accuracy, speedup = parseTopLine(x) - config.avg_accuracy = avg_accuracy - config.avg_loss = baseline_accuracy - avg_accuracy - config.speedup = speedup - config.fname = fname - #print ("acc = " + str(avg_accuracy) + "\n") - else: - flag = int(x.strip()) - config.flags.append(flag) - - it += 1 - - config_arr.append(config) - - - return config_arr - - - - - -def getFP(flag): - - if flag < 11: - return "fp16" - else: - return "fp32" - - - -def getHardwareTarget(flag): - - if flag <= 7: - return "promise" - else: - return "gpu" - - return "gpu" - - -def handlePromiseConfs(flag, layer_comp): - - approx_tech = getApproxMapping(flag, layer_comp) - config_str = "" - if flag <= 7: - config_str += approx_tech + " " - - return config_str - - -def handleGPUApproxs(flag, layer_comp): - - approx_tech = getApproxMapping(flag, layer_comp) - config_str = "" - if flag > 7: - utils.debug_print ("flag = " + str(flag)) - config_str += getOpMapping(layer_comp[0]) + " " + approx_tech + " " - for op in layer_comp[1:]: - utils.debug_print (layer_comp[1:]) - utils.debug_print (op) - - op_name = getOpMapping(op) - config_str += str(op_name) + " " + getFP(flag) + " 1 " - - return config_str - - -def generateBaselineConfig(layer_comp): - - config_str = "" - config_str += "gpu " - for op in layer_comp: - op_name = getOpMapping(op) - config_str += str(op_name) + " fp16 1 " - - return config_str - - - - - -def buildConfigStr(config, layer_desc, hardware_target): - - index = 1 - it = 0 - flags = config.flags - config_str = "" - - for layer_comp in layer_desc: - config_str += str(index) + " " - #-- print ("laye_comp = ", layer_comp) - - if isLayer(layer_comp): - flag = flags[it] - it += 1 - - utils.debug_print ("flag* = " + str(flag)) - # Add Target Target - GPU, PROMISE - #config_str += getHardwareTarget(flag) + " " - - config_str += hardware_target + " " - - utils.debug_print ("config_str = " + str(config_str)) - - config_str += handlePromiseConfs(flag, layer_comp) - config_str += handleGPUApproxs(flag, layer_comp) - - else: # if a non-Layer Operation - config_str += generateBaselineConfig(layer_comp) - - - config_str += "\n" - index += 1 - - - config_str += str(index) + " " + hardware_target + " softmax fp32 1\n" - - return config_str - - - -# Adjusts for expected loss on unseen dataset -def adjustDevTimeLoss(loss): - - # Adjusts for negative and low loss values - if loss < 0.3: - loss += 0.4 - else: - loss = loss * 1.33 # 33% extra error for unseen data - - if loss < 0.0: - loss = 0.1 - - return loss - - - -def adjustConfigLosses(configurations): - - for config in configurations: - config.avg_loss = adjustDevTimeLoss(config.avg_loss) - - - - -def dumpConfig(layer_desc, config_arrs, result_dir): - - f = open(result_dir + "/tuner_pareto_confs_" + batch_id + ".txt", "w+") - it = 1 - for config in config_arrs: - f.write("+++++\n") - f.write("conf" + str(it) + " " + str(config.speedup) + " 0 " + \ - str(config.avg_accuracy) + " " + str(config.avg_loss) + "\n") - - config_str = buildConfigStr(config, layer_desc) - - f.write(config_str) - f.write("-----\n") - - it += 1 - - -def dumpBaseLineConfig(conf_id, perf_improv, energy_red, \ - baseline_acc, hardware_target, bench_layer_composition, f_out): - - f_out.write("+++++\n") - f_out.write("conf" + str(conf_id) + " " + str(perf_improv) + " " + str(energy_red) + " " + \ - str(baseline_acc) + " " + str(0) + "\n") - - config_str = genFP32Config(bench_layer_composition, hardware_target) - - f_out.write(config_str) - f_out.write("-----\n") - - - -def genFP32Config(layer_comp, hardware_target): - - it = 1 - config_str = "" - for layer in layer_comp: - config_str += str(it) + " " - config_str += hardware_target + " " - - for op in layer: - op_name = getOpMapping(op) - config_str += str(op_name) + " fp32 1 " - - config_str += "\n" - - it += 1 - - config_str += str(it) + " " + hardware_target + " softmax fp32 1\n" - - return config_str - - - -# ***** Exported Interface --- Generates file used by HPVM RT controller ******/ -def dumpDevConfigsToRTFile(configurations, config_out_path, \ - bench_layer_composition, baseline_acc, hardware_target): - - f = open(config_out_path, "w+") - - dumpBaseLineConfig(1, 1.0, 0, baseline_acc, hardware_target, bench_layer_composition, f) - - it = 2 - for config in configurations: - f.write("+++++\n") - f.write("conf" + str(it) + " " + str(config.speedup) + " 0 " + \ - str(config.avg_accuracy) + " " + str(config.avg_loss) + "\n") - - config_str = buildConfigStr(config, bench_layer_composition, hardware_target) - - f.write(config_str) - f.write("-----\n") - - it += 1 - - - -def prependBaseline(Bench): - - f1 = open(Bench.base_dir + "/tuner_confs_base.txt", "r") - baseline_str = f1.read() - f1.close() - - f2 = open(Bench.base_dir + "/tuner_pareto_confs_" + batch_id + ".txt", "r") - config_str = f2.read() - f2.close() - - f3 = open(Bench.base_dir + "/tuner_pareto_confs_" + batch_id + ".txt", "w+") - f3.write(baseline_str) - f3.write(config_str) - f3.close() - - - -def generateConf(Bench): - - layer_desc = loadLayerDesc(Bench.layer_file) - - utils.debug_print ("layer_desc = ", layer_desc) - - #config_arr1 = loadConfigData(Bench.result_dir_1, Bench.promise_accuracy) - #config_arr2 = loadConfigData(Bench.result_dir_2, Bench.promise_accuracy) - #config_arr3 = loadConfigData(Bench.result_dir_3, Bench.promise_accuracy) - - result_dir1 = Bench.result_dir_1 + "/algo_tuner/pareto/" - result_dir2 = Bench.result_dir_2 + "/algo_tuner/pareto/" - result_dir3 = Bench.result_dir_3 + "/algo_tuner/pareto/" - - config_arr1 = loadConfigsFromDir(result_dir1, Bench.promise_accuracy) - config_arr2 = loadConfigsFromDir(result_dir2, Bench.promise_accuracy) - config_arr3 = loadConfigsFromDir(result_dir3, Bench.promise_accuracy) - - config_arrs = config_arr1 + config_arr2 + config_arr3 - - dumpConfig(layer_desc, config_arrs, Bench.base_dir) - - prependBaseline(Bench) - - - - -def dumpBaselineConfs(Bench): - - layer_desc = loadLayerDesc(Bench.layer_file) - - #-- print("layer_desc = ", layer_desc) - - f = open(Bench.base_dir + "/tuner_confs_base.txt", "w+") - - f.write("+++++\n") - f.write("conf" + str(1) + " " + str(1) + " 0 " + str(Bench.promise_accuracy) + " " + str(0) + "\n") - - config = Config() - flags = [] - for i in range(Bench.num_layers): - flags.append(11) - - config.flags = flags - config_str = buildConfigStr(config, layer_desc) - - f.write(config_str) - f.write("-----\n") - - - f.close() - - - #f.write("+++++\n") - #f.write("conf" + str(2) + " " + str(1.5) + " 0 " + str(Bench.promise_accuracy) + " " + str(0) + "\n") - - #config = Config() - #flags = [] - #for i in range(Bench.num_layers): - # flags.append(12) - - #config.flags = flags - #config_str = buildConfigStr(config, layer_desc) - - #f.write(config_str) - #f.write("-----\n") - - - - - - -if __name__ == "__main__": - - Bench = bench_tuner_data["alexnet_cifar10"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["alexnet2_cifar10"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["vgg16_cifar10"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["vgg16_cifar100"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["resnet18_cifar10"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["lenet_keras"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["mobilenet_cifar10"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - Bench = bench_tuner_data["mobilenet_shallow"] - generateConf(Bench) - dumpBaselineConfs(Bench) - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/compareResults.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/compareResults.py deleted file mode 100644 index 6ee7466242d47299d5aa7622f15aef7d35832a2a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/compareResults.py +++ /dev/null @@ -1,66 +0,0 @@ - - - -import os -from benchmarks import bench_tuner_data -from buildRtConfig import loadConfigData -from buildRtConfig import loadConfigsFromDir - - - -def compareBench(batch_ids, Bench): - - losses = ["1", "2", "3"] - for loss in losses: - print ("\n Loss = ", loss, " % \n") - for id in batch_ids: - result_dir = Bench.base_dir + "/loss_" + loss + "/batch" + id - #config_arr = loadConfigData(result_dir, Bench.promise_accuracy, "high_confidence") - - #result_dir += "/algo_tuner/high_confidence/" - result_dir += "/promise_tuner3/high_confidence/" - - config_arr = loadConfigsFromDir(result_dir, Bench.promise_accuracy) - - count = len(config_arr) - if len(config_arr) > 0: - max_speedup = max(config.speedup for config in config_arr) - else: - max_speedup = 1.0 - print ("Bench = ", Bench.promise_binary, " BatchID = ", id, " Loss = ", loss, " Count = ", count, " MaxS = ", max_speedup) - - - - -if __name__ == "__main__": - - - batch_ids = [] - - #batch_ids.append("13") # No Error Sens - baseline - #batch_ids.append("14") # Ops Skipped 10% for Loss1, 25% Loss2, 40% Loss3 - #batch_ids.append("15") # 3 differnet levels for each of Loss1, Loss2, Loss3 - #batch_ids.append("19") # Baseline + Pareto - #batch_ids.append("20") # Batch18 + Pareto - - #batch_ids.append("101") # Algo-specific tuning - - #batch_ids.append("201") # Algo-specific tuning - - #---- batch_ids.append("202") # Algo-specific tuning - #batch_ids.append("212") # Algo-specific tuning - #batch_ids.append("211") # Algo-specific tuning - - - batch_ids.append("220") # Algo-specific tuning - - - compareBench(batch_ids, bench_tuner_data["lenet_keras"]) - compareBench(batch_ids, bench_tuner_data["alexnet_cifar10"]) - compareBench(batch_ids, bench_tuner_data["mobilenet_cifar10"]) - - compareBench(batch_ids, bench_tuner_data["alexnet2_cifar10"]) - compareBench(batch_ids, bench_tuner_data["vgg16_cifar10"]) - compareBench(batch_ids, bench_tuner_data["vgg16_cifar100"]) - compareBench(batch_ids, bench_tuner_data["resnet18_cifar10"]) - compareBench(batch_ids, bench_tuner_data["mobilenet_shallow"]) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/compute_confs.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/compute_confs.py deleted file mode 100644 index f82c09095ceac24d8ee4a765f1d63be987b625a9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/compute_confs.py +++ /dev/null @@ -1,56 +0,0 @@ - - -from swing_selection import compute_swing_selection -from swing_selection2 import compute_swing_selection2 - - -def computeBenchSwings(Bench): - - dir_prefix = "../build_tuner/" - - loss_confs = [] - conf_ranks = [] - # Swing selection for 1% and 2% results - #Bench = bench_tuner_data[bench_name] - tuned_result_dir = dir_prefix + Bench.result_dir_1 + "/high_confidence/" - layer_file = Bench.layer_file - layer_swings, file_names = compute_swing_selection(tuned_result_dir, layer_file) - loss_confs.append(layer_swings) - conf_ranks.append(file_names) - print (file_names) - - tuned_result_dir = dir_prefix + Bench.result_dir_2 + "/high_confidence/" - layer_swings, file_names = compute_swing_selection(tuned_result_dir, layer_file) - loss_confs.append(layer_swings) - conf_ranks.append(file_names) - print (file_names) - - - return loss_confs, conf_ranks - - - - - -def computePSNRBenchSwings(Bench): - - loss_confs = [] - conf_ranks = [] - # Swing selection for 1% and 2% results - #Bench = bench_tuner_data[bench_name] - tuned_result_dir = Bench.result_dir_1 + "/high_confidence/" - layer_file = Bench.layer_file - layer_swings, file_names = compute_swing_selection2(tuned_result_dir, layer_file) - loss_confs.append(layer_swings) - conf_ranks.append(file_names) - print (file_names) - - tuned_result_dir = Bench.result_dir_2 + "/high_confidence/" - layer_swings, file_names = compute_swing_selection2(tuned_result_dir, layer_file) - loss_confs.append(layer_swings) - conf_ranks.append(file_names) - print (file_names) - - - return loss_confs, conf_ranks - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/error_sensitivity.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/error_sensitivity.py deleted file mode 100644 index 186477164240694ebae63f019b7824dc1e12c83b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/error_sensitivity.py +++ /dev/null @@ -1,378 +0,0 @@ - - -import subprocess -import os -import operator -from benchmarks import bench_tuner_data -from swing_selection import loadLayerDesc -import math - - -def constructTunerFile(num_flags, tensor_id, error_level, default_error): - - f = open("opentuner_flags", "w+") - - for i in range(num_flags): - if i == tensor_id: - f.write(str(error_level) + "\n") - else: - f.write(str(default_error) + "\n") - - f.close() - - - -def runAndTestError(binary_name, gold_acc): - - num_runs = 10 - - binary_name = "./" + binary_name - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([binary_name, str(num_runs)], stdout = FNULL) - p.wait() - - f = open("run_accuracies.txt") - - total_err = 0.0 - for x in f: - acc = float(x.strip()) - total_err += (gold_acc - acc) - - avg_err = total_err / num_runs - - return avg_err - - - -def roundDecimal(val): - - new_val = int(val * 10000) - new_val = float(new_val) / 10000 - - return new_val - - - - -def test_sensitivity(Bench): - - tensor_errors = [] - - error_levels = [6, 9, 12, 15] - num_flags = Bench.num_flags - - for tensor_id in range(num_flags): - total_error = 0 - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 0) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - #print (tensor_id, error_level, error) - total_error += error - - avg_error = total_error / len(error_levels) - - tensor_errors.append([tensor_id, avg_error]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_errors_multiple.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i) + "\t" + str(tensor_errors[i][1]) + "\n") - - f.close() - - f_name = Bench.base_dir + "/tensor_errors_ranked_1000.txt" - f2 = open(f_name, "w+") - tensor_errors.sort(key=operator.itemgetter(1)) - - - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - - f2.write(str(tensor_errors[i][0]) + "\t" + str(tensor_errors[i][1]) + "\n") - - - f2.close() - - - -def test_sensitivity2(Bench): - - num_flags = Bench.num_flags - - constructTunerFile(num_flags, 0, 6, 6) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - - ref_acc = Bench.tuner_accuracy - error - print ("*** Gold accuracy = ", Bench.tuner_accuracy, " Ref accuracy = ", ref_acc, " *** \n\n") - - - tensor_errors = [] - - error_levels = [6, 9, 12, 15] - - for tensor_id in range(num_flags): - total_error = 0 - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 6) - error = runAndTestError(Bench.tuner_binary, ref_acc) - print (tensor_id, error_level, error) - total_error += error - - avg_error = total_error / len(error_levels) - - tensor_errors.append([tensor_id, avg_error]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_composite_errors.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i) + "\t" + str(tensor_errors[i][1]) + "\n") - - f.close() - - f_name = Bench.base_dir + "/tensor_composite_errors_ranked.txt" - f2 = open(f_name, "w+") - tensor_errors.sort(key=operator.itemgetter(1)) - - - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - - f2.write(str(tensor_errors[i][0]) + "\t" + str(tensor_errors[i][1]) + "\n") - - - f2.close() - - - -def test_sensitivity3(Bench): - - tensor_errors = [] - - error_levels = [2, 5, 8, 11, 14, 17] - num_flags = Bench.num_flags - - for tensor_id in range(num_flags): - total_error = 0 - errors = [] - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 0) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - print (tensor_id, error_level, error) - errors.append(error) - - tensor_errors.append([tensor_id, errors]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_errors_multiple.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i)) - for j in range(len(tensor_errors[i][1])): - val = roundDecimal(tensor_errors[i][1][j]) - f.write("\t" + str(val) ) - f.write("\n") - - f.close() - - - - - -def test_sensitivity4(Bench): - - num_flags = Bench.num_flags - - constructTunerFile(num_flags, 0, 5, 5) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - - ref_acc = Bench.tuner_accuracy - error - print ("*** Gold accuracy = ", Bench.tuner_accuracy, " Ref accuracy = ", ref_acc, " *** \n\n") - - - tensor_errors = [] - error_levels = [4, 8, 11, 14, 16, 19] - - for tensor_id in range(num_flags): - errors = [] - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 5) - error = runAndTestError(Bench.tuner_binary, ref_acc) - print (tensor_id, error_level, error) - errors.append(error) - - tensor_errors.append([tensor_id, errors]) - - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/composite_errors.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i)) - for j in range(len(tensor_errors[i][1])): - val = roundDecimal(tensor_errors[i][1][j]) - f.write("\t" + str(val) ) - f.write("\n") - - f.close() - - - - - -def readTensorErrs(result_dir): - - tensor_errs = [] - f = open(result_dir + "/tensor_errors.txt") - - for x in f: - err = float(x.split()[1]) - tensor_errs.append(err) - - return tensor_errs - - - -def readTensorErrs2(result_dir): - - tensor_errs = [] - f = open(result_dir + "/tensor_errors_multiple.txt") - - for x in f: - toks = x.split() - total_err = 0.0 - for tok in toks[2:-1]: - err = float(tok) - total_err += err - - avg_err = total_err / len(toks[2:-1]) - tensor_errs.append(avg_err) - - return tensor_errs - - -def isSkipLayer(layer): - - if "dense" in layer or "conv" in layer: - return False - else: - return True - - -def readLayerCosts(cost_file): - - f = open(cost_file) - layer_costs = [] - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - return layer_costs - - - -disable_skipping = False - -def select_skip_layers(Bench, percent_to_skip): - - if disable_skipping: - return "0" - - result_dir = Bench.base_dir - layer_file = Bench.layer_file - - tensor_errs = readTensorErrs2(result_dir) - layer_costs = readLayerCosts(Bench.cost_file) - layer_desc = loadLayerDesc(layer_file) - - it = 0 - index = 0 - layer_errs = [] - for layer in layer_desc: - layer_len = len(layer) - avg_err = tensor_errs[index] - index += layer_len - - if isSkipLayer(layer): - continue - - cost = (math.sqrt(layer_costs[it])) / 100; - ERR_IMPACT = avg_err / cost - #print ("layer, ", it, " avg_err = ", avg_err, " cost = ", cost, " err_impact = ", ERR_IMPACT) - - layer_errs.append((ERR_IMPACT, it)) - it += 1 - - layer_errs.sort(key=operator.itemgetter(0), reverse=True) - - to_skip = len(layer_errs) - to_skip = math.ceil((percent_to_skip / 100.0) * to_skip) - - skip_str = "" - it = 0 - for err in layer_errs: - if it >= to_skip: - break - - skip_str += str(err[1]) - if it < to_skip - 1: - skip_str += "_" - - it += 1 - - return skip_str - - - - - - -if __name__ == "__main__": - - - AlexNet = bench_tuner_data["alexnet_cifar10"] - skip_str = select_skip_layers(AlexNet, 10) - print ("AlexNet skip_str = ", skip_str) - - - AlexNet2 = bench_tuner_data["alexnet2_cifar10"] - skip_str = select_skip_layers(AlexNet2, 15) - print ("AlexNet2 skip_str = ", skip_str) - - - VGG16 = bench_tuner_data["vgg16_cifar10"] - skip_str = select_skip_layers(VGG16, 15) - print ("VGG16 skip_str = ", skip_str) - - - VGG16_100 = bench_tuner_data["vgg16_cifar100"] - skip_str = select_skip_layers(VGG16_100, 15) - print ("VGG16_100 skip_str = ", skip_str) - - - ResNet = bench_tuner_data["resnet18_cifar10"] - skip_str = select_skip_layers(ResNet, 10) - print ("ResNet skip_str = ", skip_str) - - - MobileNet = bench_tuner_data["mobilenet_cifar10"] - skip_str = select_skip_layers(MobileNet, 15) - print ("MobileNet skip_str = ", skip_str) - - - MobileNet_SH = bench_tuner_data["mobilenet_shallow"] - skip_str = select_skip_layers(MobileNet_SH, 15) - print ("MobileNet_SH skip_str = ", skip_str) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/genPlots.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/genPlots.py deleted file mode 100644 index df05ddc52be66a0073a76093b77f1de328706635..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/genPlots.py +++ /dev/null @@ -1,41 +0,0 @@ - - -import matplotlib.pyplot as plt -import seaborn -import numpy as np - - - - - -def genScatterPlot(accuracy_losses, speedups, file_path): - - plt.scatter(accuracy_losses, speedups) - plt.xlabel("accuracy_loss") - plt.ylabel("speedup") - plt.savefig(file_path) - plt.close() - - - -def genScatterPlotFromConfigs(configurations, file_path): - - accuracy_losses = [] - speedups = [] - - for conf in configurations: - accuracy_losses.append(conf.avg_loss) - speedups.append(conf.speedup) - - genScatterPlot(accuracy_losses, speedups, file_path) - - - -if __name__ == "__main__": - - x = np.array([1, 2, 3]) - y = np.array([1, 2, 3]) - - print ("type = ", type(plt.scatter(x, y))) - - plt.savefig("output.png") diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/global_paths.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/global_paths.py deleted file mode 100644 index d93e96ef7cdca95239c625a6018fb4b2adb1ba45..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/global_paths.py +++ /dev/null @@ -1,12 +0,0 @@ - -import os -import sys - - - -if "LLVM_SRC_ROOT" not in os.environ: - print ("ERROR: LLVM_SRC_ROOT not set --- set $LLVM_SRC_ROOT to top of LLVM source tree ") - sys.exit(-1) - -opentuner_src_dir = os.environ["LLVM_SRC_ROOT"] + "/projects/hpvm-tensor-rt/autotuner/opentuner/autotuner/" -tensorRT_dir = os.environ["LLVM_SRC_ROOT"] + "/projects/hpvm-tensor-rt/" diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/knob_pruning.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/knob_pruning.py deleted file mode 100644 index dfcab4f36bf425615debad880a0e2a828867d7ba..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/knob_pruning.py +++ /dev/null @@ -1,86 +0,0 @@ - - -import utils -import subprocess - - -def createPromiseFile(l_knob, layer_ind, num_layers): - - f = open("promise_flags", "w+") - - for i in range(num_layers): - if i == layer_ind: - f.write(str(l_knob) + "\n") - else: - f.write("11\n") - - f.close() - - - -def runBinary(binary_path): - - run_cmd = "./" + binary_path - print (run_cmd) - - p = subprocess.Popen(run_cmd, shell=True) - p.wait() - - return utils.readAccuracy("final_accuracy") - - - - - -def getPrunedKnobs(binary_path, layer_file, global_knobs_file, \ - baseline_acc, acc_slack): - - - knobs = utils.getInstallAndDevKnobs(layer_file, \ - global_knobs_file) - - pruned_knobs = [] - num_layers = len(knobs) - layer_ind = 0 - for layer_knobs in knobs: - pruned_layer_knobs = [] - for l_knob in layer_knobs: - createPromiseFile(l_knob, layer_ind, num_layers) - accuracy = runBinary(binary_path) - acc_loss = baseline_acc - accuracy - if acc_loss <= acc_slack: - pruned_layer_knobs.append(l_knob) - print ("\n + l_knob = ", l_knob, " - layer_ind = ", layer_ind) - print ("- acc_loss = ", acc_loss, " **** SELECTED *** ") - else: - print ("\n -- l_knob = ", l_knob, " - layer_ind = ", layer_ind) - print ("- acc_loss = ", acc_loss, " --- REJECTED --- ") - - pruned_knobs.append(pruned_layer_knobs) - - layer_ind += 1 - - - print ("*** knobs = ", knobs) - - return pruned_knobs - - - -if __name__ == "__main__": - - - #pruned_knobs = getPrunedKnobs("alexnet2_promise", "../autotuner/data/alexnet2/alexnet2_layers.txt", \ - # "../autotuner/data/global_knobs.txt", 84.5, 3) - - pruned_knobs = getPrunedKnobs("lenet_promise", "../autotuner/data/lenet/lenet_layers.txt", \ - "../autotuner/data/global_knobs.txt", 99.7, 1) - - - - print ("*** pruned_knobs = ", pruned_knobs) - - - utils.dumpKnobsFile(pruned_knobs, "install_knobs.txt") - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/main_driver.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/main_driver.py deleted file mode 100644 index c4a5e0fac038fbddee0025a3e0b75b8005d3be3e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/main_driver.py +++ /dev/null @@ -1,69 +0,0 @@ - -import os -import sys -import subprocess -import shutil - - -from benchmarks import bench_tuner_data, batch_id -from utils import createResultDirs -from run_devtime_tuner import DevTimeTuner -from run_install_tuner import InstallTimeTuner - - - - -# Invoke Dev-time Autotuner -def runDevTimeBenchs(): - - Bench = bench_tuner_data["mobilenet_cifar10"] - benchTuner = DevTimeTuner(Bench) - benchTuner.runDevTuner() - - Bench = bench_tuner_data["resnet18_cifar10"] - benchTuner = DevTimeTuner(Bench) - benchTuner.runDevTuner() - - Bench = bench_tuner_data["alexnet_cifar10"] - benchTuner = DevTimeTuner(Bench) - benchTuner.runDevTuner() - - - #for bench_id in bench_tuner_data: - # Bench = bench_tuner_data[bench_id] - # benchTuner = DevTimeTuner(Bench) - # benchTuner.runDevTuner() - - - - - - -# Invoke Dev-time Autotuner -def runInstallTimeBenchs(): - - Bench = bench_tuner_data["alexnet_cifar10"] - benchTuner = InstallTimeTuner(Bench) - benchTuner.runDevTuner() - - - Bench = bench_tuner_data["alexnet2_cifar10"] - benchTuner = InstallTimeTuner(Bench) - benchTuner.runDevTuner() - - - - - - - -if __name__ == "__main__": - - createResultDirs(bench_tuner_data) - - #runDevTimeBenchs() - - runInstallTimeBenchs() - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/pareto_curve.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/pareto_curve.py deleted file mode 100644 index d90403be23fae547fde9e2ac4996f5cca3b0e5fb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/pareto_curve.py +++ /dev/null @@ -1,313 +0,0 @@ - - -from buildRtConfig import loadConfigData -from benchmarks import bench_tuner_data -import os -import shutil - - -AL_THRESHOLD = 0.1 -SPEEDUP_BAND_SIZE = 0.1 -ENERGY_BAND_SIZE = 10 - -class Configuration: - def __init__(self, name, speedup, energy, accuracy, accuracy_loss, flags): - self.name = name - self.speedup = speedup - self.energy = energy - self.accuracy = accuracy - self.accuracy_loss = accuracy_loss - self.flags = flags - def __repr__(self): - return repr((self.name, self.speedup, self.energy, self.accuracy, self.accuracy_loss)) - -configuration_objects = [ - Configuration('conf1', 1.05, 15, 85, 1.2, []), - Configuration('conf2', 2.51, 12, 83, 1.4, []), - Configuration('conf3', 2.05, 10, 84, 0.8, []), -] - -def compute_pareto_points(configurations): - speedupconfigurations = [] - energyconfigurations = [] - #sort configurations based on speedup - sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - # find best speedup end energy in this accuracy loss level - sp = -1.0 - sp_idx = 0 - en = -1.0 - en_idx = 0 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup > sp: - sp = sorted_configurations[i].speedup - sp_idx = i - if sorted_configurations[i].energy > en: - en = sorted_configurations[i].energy - en_idx = i - sp_not_dominated = True - # if not empty list of configurations - if speedupconfigurations: - if speedupconfigurations[-1].speedup >= sp: - sp_not_dominated = False - en_not_dominated = True - # if not empty list of configurations - if energyconfigurations: - if energyconfigurations[-1].energy >= en: - en_not_dominated = False - if sp_not_dominated: - speedupconfigurations.append(sorted_configurations[sp_idx]) - if en_not_dominated: - energyconfigurations.append(sorted_configurations[en_idx]) - # outer while loop variable increment - start_idx = end_idx - return [speedupconfigurations, energyconfigurations] - - -def compute_pareto_points_with_margin(configurations, speedup_band_width, energy_band_width): - speedupconfigurations = [] - energyconfigurations = [] - #sort configurations based on speedup - sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) - - idx_to_sp_conf_dict = {} - idx_to_en_conf_dict = {} - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - # find best speedup end energy in this accuracy loss level - sp = -1.0 - sp_idx = 0 - en = -1.0 - en_idx = 0 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup > sp: - sp = sorted_configurations[i].speedup - sp_idx = i - if sorted_configurations[i].energy < en: - en = sorted_configurations[i].energy - en_idx = i - sp_not_dominated = True - # if not empty list of configurations - if speedupconfigurations: - if speedupconfigurations[-1].speedup >= sp: - sp_not_dominated = False - en_not_dominated = True - # if not empty list of configurations - if energyconfigurations: - if energyconfigurations[-1].energy >= en: - en_not_dominated = False - if sp_not_dominated: - speedupconfigurations.append(sorted_configurations[sp_idx]) - idx_to_sp_conf_dict[start_idx] = len(speedupconfigurations)-1 - if en_not_dominated: - energyconfigurations.append(sorted_configurations[en_idx]) - idx_to_en_conf_dict[start_idx] = len(energyconfigurations)-1 - # outer while loop variable increment - start_idx = end_idx - - # We want to add configurations in a band of a certain width around the curves - # not possible to do during contruction, because the quality of the curve would - # deteriorate quickly - - AdjustedSpeedupCurve = [] - AdjustedEnergyCurve = [] - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup + speedup_band_width >= speedupconfigurations[idx_to_sp_conf_dict[start_idx]].speedup: - AdjustedSpeedupCurve.append(sorted_configurations[i]) - if sorted_configurations[i].energy + energy_band_width >= energyconfigurations[idx_to_en_conf_dict[start_idx]].energy: - AdjustedEnergyCurve.append(sorted_configurations[i]) - # outer while loop variable increment - start_idx = end_idx - - return [AdjustedSpeedupCurve, AdjustedEnergyCurve] - - - -def findParetoConfigs(base_dir, accuracy): - - result_dir = base_dir + "/algo_tuner/pareto/" - try: - os.mkdir(result_dir) - except: - print ("could not create dir") - - input_dir = base_dir - config_arr = loadConfigData(input_dir, accuracy, "high_confidence") - - config_list = [] - it = 0 - for config in config_arr: - config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss, config.flags) - config_list.append(config) - - - if (len(config_list) > 0): - max_speedup = max(config.speedup for config in config_list) - else: - max_speedup = 1.5 - - SPEEDUP_BAND_SIZE = 0.05 # max_speedup * 1.0 / 12 # 4 # 20% of the max speedup - ENERGY_BAND_SIZE = 10 - - print ("max_speedup = ", max_speedup, " BAND_SIZE = ", SPEEDUP_BAND_SIZE) - - - print ("*SPEEDUP_BAND_SIZE = ", SPEEDUP_BAND_SIZE) - - ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) - - - #print (ASC) - #print (config_list) - - if len(ASC) < 5: - ASC = config_list - - - if len(ASC) > 50: - ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE/4, ENERGY_BAND_SIZE) - - - print ("len(config_list) = ", len(config_list)) - print ("len(ASC) = ", len(ASC)) - - for conf in ASC: - src_path = base_dir + "/algo_tuner/high_confidence/" + conf.name - dst_path = base_dir + "/algo_tuner/pareto/" + conf.name - shutil.copy(src_path, dst_path) - - return ASC - - - -def flagsPerLayer(ASC, num_layers): - - layer_flags = [] - for i in range(num_layers): - layer_map = {} - layer_flags.append(layer_map) - - - for config in ASC: - config_flags = config.flags - for i in range(len(config_flags)): - layer_flags[i][config_flags[i]] = 1 - - - print (layer_flags) - - return layer_flags - - - - - - -def dumpBenchPareto(Bench): - - result_dir1 = Bench.result_dir_1 - result_dir2 = Bench.result_dir_2 - result_dir3 = Bench.result_dir_3 - - acc_thresh = Bench.promise_accuracy - - ASC1 = findParetoConfigs(result_dir1, acc_thresh) - ASC2 = findParetoConfigs(result_dir2, acc_thresh) - ASC3 = findParetoConfigs(result_dir3, acc_thresh) - - - flags1 = flagsPerLayer(ASC1, Bench.num_layers) - flags2 = flagsPerLayer(ASC2, Bench.num_layers) - flags3 = flagsPerLayer(ASC3, Bench.num_layers) - - return flags1, flags2, flags3 - - - - -if __name__ == "__main__": - - Bench = bench_tuner_data["alexnet_cifar10"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["alexnet2_cifar10"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["vgg16_cifar10"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["vgg16_cifar100"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["resnet18_cifar10"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["lenet_keras"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["mobilenet_cifar10"] - dumpBenchPareto(Bench) - - Bench = bench_tuner_data["mobilenet_shallow"] - dumpBenchPareto(Bench) - - - #get_pareto_configs("") - - #SC, EC = compute_pareto_points(configuration_objects) - #ASC, AEC = compute_pareto_points_with_margin(configuration_objects, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) - - #print(SC) - #print(EC) - - #print(ASC) - #print(AEC) - - - - - - - #result_dir = base_dir + "/pareto/" - #try: - # os.mkdir(result_dir) - #except: - # print "could not create dir" - - #input_dir = base_dir + "/full_results/" - #result_dir = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch15" - #config_arr = loadConfigData(input_dir, accuracy) - - #config_list = [] - - #it = 0 - #for config in config_arr: - # config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss) - # config_list.append(config) - - - #ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) - - #for conf in ASC: - # dst_path = conf.name.replace("full_results", "pareto") - # shutil.copy(conf.name, dst_path) - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/pareto_utils.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/pareto_utils.py deleted file mode 100644 index ae85160e8f36986c3a58e6033b0684a4338256e2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/pareto_utils.py +++ /dev/null @@ -1,257 +0,0 @@ - - -from buildRtConfig import loadConfigsFromDir -import os -import shutil - - -AL_THRESHOLD = 0.1 -SPEEDUP_BAND_SIZE = 0.1 -ENERGY_BAND_SIZE = 10 - - -class Configuration: - - def __init__(self, name, speedup, energy, accuracy, accuracy_loss, flags): - self.name = name - self.speedup = speedup - self.energy = energy - self.accuracy = accuracy - self.avg_accuracy = accuracy - self.accuracy_loss = accuracy_loss - self.avg_loss = accuracy_loss - self.flags = flags - - def __repr__(self): - return repr((self.name, self.speedup, self.energy, self.accuracy, self.accuracy_loss)) - - @staticmethod - def speedup_points(configurations): - - return [ - (conf.speedup, conf.accuracy) - for conf in configurations - ] - - -configuration_objects = [ - Configuration('conf1', 1.05, 15, 85, 1.2, []), - Configuration('conf2', 2.51, 12, 83, 1.4, []), - Configuration('conf3', 2.05, 10, 84, 0.8, []), -] - - - -def is_pareto_efficient(configs, values, value_margins): - import numpy as np - from pprint import pprint - - np_values = np.array(values) - np_margins = np.array(value_margins) - is_efficient = np.ones(np_values.shape[0], dtype=bool) - - for i, c in enumerate(np_values): - if is_efficient[i]: - # Keep any point with a higher value - is_efficient[is_efficient] = np.any(np_values[is_efficient] + np_margins >= c, axis=1) - is_efficient[i] = True # And keep self - - return (np.array(configs)[is_efficient]).tolist() - - - - - -def compute_pareto_points(configurations): - speedupconfigurations = [] - energyconfigurations = [] - #sort configurations based on speedup - sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and \ - (sorted_configurations[end_idx].accuracy_loss - \ - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - - # find best speedup end energy in this accuracy loss level - sp = -1.0 - sp_idx = 0 - en = -1.0 - en_idx = 0 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup > sp: - sp = sorted_configurations[i].speedup - sp_idx = i - if sorted_configurations[i].energy > en: - en = sorted_configurations[i].energy - en_idx = i - sp_not_dominated = True - # if not empty list of configurations - if speedupconfigurations: - if speedupconfigurations[-1].speedup >= sp: - sp_not_dominated = False - en_not_dominated = True - # if not empty list of configurations - if energyconfigurations: - if energyconfigurations[-1].energy >= en: - en_not_dominated = False - if sp_not_dominated: - speedupconfigurations.append(sorted_configurations[sp_idx]) - if en_not_dominated: - energyconfigurations.append(sorted_configurations[en_idx]) - # outer while loop variable increment - start_idx = end_idx - return [speedupconfigurations, energyconfigurations] - - - - -def compute_pareto_points_with_margin(configurations, speedup_band_width, energy_band_width): - - speedupconfigurations = [] - energyconfigurations = [] - #sort configurations based on speedup - sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) - - idx_to_sp_conf_dict = {} - idx_to_en_conf_dict = {} - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and \ - (sorted_configurations[end_idx].accuracy_loss - \ - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - - # find best speedup end energy in this accuracy loss level - sp = -1.0 - sp_idx = 0 - en = -1.0 - en_idx = 0 - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup > sp: - sp = sorted_configurations[i].speedup - sp_idx = i - if sorted_configurations[i].energy < en: - en = sorted_configurations[i].energy - en_idx = i - sp_not_dominated = True - # if not empty list of configurations - if speedupconfigurations: - if speedupconfigurations[-1].speedup >= sp: - sp_not_dominated = False - en_not_dominated = True - # if not empty list of configurations - if energyconfigurations: - if energyconfigurations[-1].energy >= en: - en_not_dominated = False - if sp_not_dominated: - speedupconfigurations.append(sorted_configurations[sp_idx]) - idx_to_sp_conf_dict[start_idx] = len(speedupconfigurations)-1 - if en_not_dominated: - energyconfigurations.append(sorted_configurations[en_idx]) - idx_to_en_conf_dict[start_idx] = len(energyconfigurations)-1 - # outer while loop variable increment - start_idx = end_idx - - # We want to add configurations in a band of a certain width around the curves - # not possible to do during contruction, because the quality of the curve would - # deteriorate quickly - - AdjustedSpeedupCurve = [] - AdjustedEnergyCurve = [] - - start_idx = 0 - while start_idx < len(sorted_configurations): - end_idx = start_idx + 1; - # find end_idx - while end_idx < len(sorted_configurations) and \ - (sorted_configurations[end_idx].accuracy_loss - \ - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : - end_idx += 1 - - for i in range(start_idx, end_idx): - if sorted_configurations[i].speedup + speedup_band_width >= \ - speedupconfigurations[idx_to_sp_conf_dict[start_idx]].speedup: - AdjustedSpeedupCurve.append(sorted_configurations[i]) - if sorted_configurations[i].energy + energy_band_width >= \ - energyconfigurations[idx_to_en_conf_dict[start_idx]].energy: - AdjustedEnergyCurve.append(sorted_configurations[i]) - # outer while loop variable increment - start_idx = end_idx - - return [AdjustedSpeedupCurve, AdjustedEnergyCurve] - - - - -#***** Exported Routine *******/ -def dumpParetoConfigsToDir(input_dir, output_dir, gold_accuracy, enable_band): - - config_arr = loadConfigsFromDir(input_dir, gold_accuracy) - config_list = [] - it = 0 - - for config in config_arr: - config = Configuration(config.fname , config.speedup, 100, \ - config.avg_accuracy, config.avg_loss, config.flags) - - config_list.append(config) - - - if (len(config_list) > 0): - max_speedup = max(config.speedup for config in config_list) - else: - max_speedup = 1.0 # No Speedup since no configuration found - - - #SPEEDUP_BAND_SIZE = 0.05 # max_speedup * 1.0 / 12 # 4 # 20% of the max speedup - - if enable_band: - SPEEDUP_BAND_SIZE = max_speedup * 1.0 / 10 # 10% of the max speedup is the 'BAND SIZE' - ENERGY_BAND_SIZE = 0 # Unused right now - - ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) - - else: - SPEEDUP_BAND_SIZE = 0 # no pareto band - true pareto curve - ENERGY_BAND_SIZE = 0 # Unused right now - - #ASC, AEC = compute_pareto_points(config_list) - - speedup_points = Configuration.speedup_points(config_list) - ASC = is_pareto_efficient(config_list, speedup_points, [-0.001, -0.001]) # [0.05, 0.05]) - - - - print ("*max_speedup = ", max_speedup) - print ("*SPEEDUP_BAND_SIZE = ", SPEEDUP_BAND_SIZE) - - - # Prevents very small pareto-curves - #if len(ASC) < 10 or len(config_list) < 20: - - #if len(config_list) < 10: - # ASC = config_list - - - print ("len(config_list) = ", len(config_list)) - print ("len(ASC) = ", len(ASC)) - - for conf in ASC: - src_path = input_dir + '/' + conf.name - dst_path = output_dir + '/' + conf.name - shutil.copy(src_path, dst_path) - - return ASC - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/profiling.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/profiling.py deleted file mode 100644 index 3ed37822a6fa654c16f5c8ce3b41dc8287931b87..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/profiling.py +++ /dev/null @@ -1,26 +0,0 @@ - -import time - -profiled_ops = {} - -def startProfile(op_id): - start = time.time() - return start - - -def stopProfile(op_id, start): - end = time.time() - total_time = end - start - - profiled_ops[op_id] = total_time - - -def dumpProfiles(file_name): - - f = open(file_name, "w+") - for op_id in profiled_ops: - f.write(op_id + " : " + str(profiled_ops[op_id]) + "\n") - - f.close() - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/remap.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/remap.py deleted file mode 100644 index 8dc69357526b711d563d454f0ce41219dbfe579c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/remap.py +++ /dev/null @@ -1,291 +0,0 @@ - -import sys -import os -import shutil -from validation import invokeBinary -from buildRtConfig import loadConfigData, loadPromiseConfigs -from benchmarks import bench_tuner_data, batch_id -from swing_selection import convL1bins, convL2bins - - - -def readKnobConfig(file_path): - - knobs_speedup = {} - f = open(file_path, "r") - for x in f: - toks = x.split("\t") - ID = int(toks[0].split(",")[1]) - - speedup = float(toks[2]) - knobs_speedup[ID] = speedup - - print ("knobs_speedup = ", knobs_speedup) - - return knobs_speedup - - - -def getPromiseSwing(l1, l2, flag): - - if l1 < 0.1 or l2 < 0.1: - return flag - - swing = 1 - for i in range(len(convL1bins)): - l1_t = convL1bins[i][0] - l2_t = convL2bins[i][0] - - if l1 > l1_t and l2 > l2_t: - break - swing += 1 - - return swing - - - -def replaceWithPromise(layer_flags, norms_file): - - num_layers = len(layer_flags) - - f = open(norms_file, "r") - it = 0 - for x in f: - op_name = x.split()[0] - print ("op_name = ", op_name) - if op_name == "tensorMul": - break; - - l1 = float(x.split()[5]) - l2 = float(x.split()[6]) - - if it > 0: - flag = getPromiseSwing(l1, l2, layer_flags[it]) - layer_flags[it] = flag - - #print ("l1 = ", l1, " l2 = ", l2) - it += 1 - - if it == num_layers: - break - - print (layer_flags) - return layer_flags - - - - -def readCostFile(file_path): - - layer_costs = [] - f = open(file_path) - for x in f: - cost = float(x.strip()) - layer_costs.append(cost) - - print ("len(layer_costs) = ", layer_costs) - f.close() - - return layer_costs - - - -def getSpeedup(flags, knobs_speedup, layer_costs): - - orig_cost = 0.0 - total_cost = 0.0 - it = 0 - for flag_value in flags: - op_cost = layer_costs[it] - speedup = knobs_speedup[flag_value] - - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - it += 1 - - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - - return speedup - - - -def dumpNewFlags(new_flags, orig_file, promise_flags_file, layer_costs, knobs_speedup): - - speedup = getSpeedup(new_flags, knobs_speedup, layer_costs) - - top_line = "" - for x in open(orig_file, "r"): - top_line = x - break - - f = open(promise_flags_file, "w+") - f.write(top_line.replace("\n", "")) - f.write("\tnew_speedup=" + str(speedup) + "\n") - - - for flag in new_flags: - f.write(str(flag) + "\n") - - f.close() - - - - -def remapLossConfig(configs_arr, result_dir, sub_dir, layer_costs, knobs_speedup): - - - for conf in configs_arr: - layer_flags = conf.flags - fname = conf.fname - norms_file = result_dir + "/algo_tuner/" + sub_dir + "/" + fname + "_norms" - orig_file = result_dir + "/algo_tuner/" + sub_dir + "/" + fname - new_flags = replaceWithPromise(layer_flags, norms_file) - - promise_test_dir = result_dir + "/algo_tuner/promise_test/" - if not os.path.exists(promise_test_dir): - os.mkdir(promise_test_dir) - - promise_flags_file = result_dir + "/algo_tuner/promise_test/" + fname + "_promise" - dumpNewFlags(new_flags, orig_file, promise_flags_file, layer_costs, knobs_speedup) - - - -def remapConfigs(Bench): - - - loss1_dir = Bench.result_dir_1 - loss2_dir = Bench.result_dir_2 - loss3_dir = Bench.result_dir_3 - - loss1_configs = loadConfigData(loss1_dir, 100, "validated") - loss2_configs = loadConfigData(loss2_dir, 100, "validated") - loss3_configs = loadConfigData(loss3_dir, 100, "validated") - - knobs_speedup = readKnobConfig("../opentuner/data/global_knobs.txt") - layer_costs = readCostFile(Bench.cost_file) - - remapLossConfig(loss1_configs, loss1_dir, "validated", layer_costs, knobs_speedup) - remapLossConfig(loss2_configs, loss2_dir, "validated", layer_costs, knobs_speedup) - remapLossConfig(loss3_configs, loss3_dir, "validated", layer_costs, knobs_speedup) - - - - -def validateRemapConfigs(Bench): - - num_layers = Bench.num_layers - base_conf = getBaselineConfig(num_layers) - # Path to binary to run - binary_path = Bench.promise_binary - # NOTE: 'target_acc' passed 0.0 since unused for baseline run - invokeBinary(binary_path, base_conf, 1, 2000, 8000, 0.0) - gold_acc = readAccuracy("final_accuracy") - - - loss1_dir = Bench.result_dir_1 - loss2_dir = Bench.result_dir_2 - loss3_dir = Bench.result_dir_3 - - loss1_configs = loadPromiseConfigs(loss1_dir, 100, "promise_test") - loss2_configs = loadPromiseConfigs(loss2_dir, 100, "promise_test") - loss3_configs = loadPromiseConfigs(loss3_dir, 100, "promise_test") - - runs = 30 - validateAlgoConfigs(binary_path, loss1_dir, loss1_configs, gold_acc, 1.0, runs) - validateAlgoConfigs(binary_path, loss2_dir, loss2_configs, gold_acc, 2.0, runs) - validateAlgoConfigs(binary_path, loss3_dir, loss3_configs, gold_acc, 3.0, runs) - - - - - - - - -def copyNormFile(fname, result_dir, sub_dir): - - target_dir = result_dir + "/algo_tuner/" + sub_dir - dest_file = target_dir + "/" + fname + "_norms" - - shutil.copy("accuracy_summary", dest_file) - - - - -def dumpNorms(binary_path, result_dir, configs_arr): - - runs = 1 - for conf in configs_arr: - layer_swings = conf.flags - invokeBinary(binary_path, layer_swings, runs, 3000, 5000, 100) - - #copyNormFile(conf.fname, result_dir, "high_confidence") - copyNormFile(conf.fname, result_dir, "validated") - - - -def computeConfigNorms(Bench): - - loss1_dir = Bench.result_dir_1 - loss2_dir = Bench.result_dir_2 - loss3_dir = Bench.result_dir_3 - - loss1_configs = loadConfigData(loss1_dir, 100, "validated") - loss2_configs = loadConfigData(loss2_dir, 100, "validated") - loss3_configs = loadConfigData(loss3_dir, 100, "validated") - - - binary_path = Bench.promise_binary - - dumpNorms(binary_path, loss1_dir, loss1_configs) - dumpNorms(binary_path, loss2_dir, loss2_configs) - dumpNorms(binary_path, loss3_dir, loss3_configs) - - - -if __name__ == "__main__": - - Bench = bench_tuner_data["alexnet_cifar10"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["alexnet2_cifar10"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["vgg16_cifar10"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["vgg16_cifar100"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["resnet18_cifar10"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["mobilenet_shallow"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["mobilenet_cifar10"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - Bench = bench_tuner_data["lenet_keras"] - computeConfigNorms(Bench) - remapConfigs(Bench) - #validateRemapConfigs(Bench) - - #computeConfigNorms(Bench) - #remapConfigs(Bench) - - #validateRemapConfigs(Bench) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_algo_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_algo_tuner.py deleted file mode 100644 index 2df75fbfc4e7568361747f75f06a4b818a8f99be..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_algo_tuner.py +++ /dev/null @@ -1,102 +0,0 @@ - - -import os -import subprocess -from error_sensitivity import select_skip_layers - - -def runAlgoTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_runs): - - tuner_cmd = "python2 ../opentuner/autotuner/algo_tuner.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-layers " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --result-dir " - tuner_cmd += dir_prefix - tuner_cmd += result_dir + "/algo_tuner/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy - acc_threshold) - tuner_cmd += " --cost-file " - tuner_cmd += Bench.cost_file - tuner_cmd += " --knobs-config " - tuner_cmd += "../opentuner/data/global_knobs.txt" - tuner_cmd += " --layer-knobs " - tuner_cmd += Bench.layer_knobs - - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - -""" - -def promiseTunerLoss1(Bench, dir_prefix): - - tuner_runs = int(Bench.autotuner_runs / 3) - - skip_layers1 = "0" - skip_layers2 = "0_" + select_skip_layers(Bench, 30) - skip_layers3 = "0_" + select_skip_layers(Bench, 50) - - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers1) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers2) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers3) - - -def promiseTunerLoss2(Bench, dir_prefix): - - tuner_runs = int(Bench.autotuner_runs / 3) - - skip_layers1 = "0" - skip_layers2 = "0_" + select_skip_layers(Bench, 20) - skip_layers3 = "0_" + select_skip_layers(Bench, 40) - - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers1) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers2) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers3) - - - -def promiseTunerLoss3(Bench, dir_prefix): - - tuner_runs = int (Bench.autotuner_runs / 3) - - skip_layers1 = "0" - skip_layers2 = "0_" + select_skip_layers(Bench, 10) - skip_layers3 = "0_" + select_skip_layers(Bench, 30) - - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers1) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers2) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers3) - - -""" - - -BASELINE = True - - -def runAlgoBench(Bench): - - # NOTE-IMP: Changing current directory to one with promise binaries - dir_prefix = "../build_tuner/" - - - if BASELINE: - tuner_runs = Bench.autotuner_runs - runAlgoTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs) - runAlgoTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs) - runAlgoTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs) - - else: - promiseTunerLoss1(Bench, dir_prefix) - promiseTunerLoss2(Bench, dir_prefix) - promiseTunerLoss3(Bench, dir_prefix) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_algo_tuner2.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_algo_tuner2.py deleted file mode 100644 index 99867fade3aac75d2fcc4c411e25c2d16595052d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_algo_tuner2.py +++ /dev/null @@ -1,186 +0,0 @@ - - -import os -import numpy as np -import subprocess -from error_sensitivity import select_skip_layers -from pareto_curve import dumpBenchPareto -from remap import readCostFile - - -def runAlgoTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_runs): - - fixed_runs = 100 - - tuner_cmd = "python2 ../opentuner/autotuner/algo_tuner2.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(fixed_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-layers " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --result-dir " - tuner_cmd += dir_prefix - tuner_cmd += result_dir + "/promise_tuner3/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy - acc_threshold) - tuner_cmd += " --cost-file " - tuner_cmd += Bench.cost_file - tuner_cmd += " --layer-file " - tuner_cmd += Bench.layer_file - tuner_cmd += " --knobs-config " - tuner_cmd += "../opentuner/data/global_knobs.txt" - tuner_cmd += " --layer-knobs " - tuner_cmd += " local_knobs.txt " - - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - -def is50Knob(flag): - - flags50 = {} - flags50[21] = 1 - flags50[22] = 1 - flags50[26] = 1 - flags50[27] = 1 - flags50[31] = 1 - flags50[32] = 1 - - if flag in flags50: - return True - else: - return False - - - -def is25Knob(flag): - - flags25 = {} - flags25[23] = 1 - flags25[24] = 1 - flags25[25] = 1 - flags25[28] = 1 - flags25[29] = 1 - flags25[30] = 1 - flags25[33] = 1 - flags25[34] = 1 - flags25[35] = 1 - flags25[36] = 1 - - if flag in flags25: - return True - else: - return False - - - -def addPromiseFlags(flag_map): - - flags = [] - - has_50_flag = False - has_25_flag = False - - for flag in flag_map: - if is50Knob(flag): - has_50_flag = True - if is25Knob(flag): - has_25_flag = True - - - if has_50_flag: - flag_map[7] = 1 - flag_map[5] = 1 - flag_map[3] = 1 - - if has_25_flag: - flag_map[7] = 1 - - return flag_map - - - -def addCostBasedFlags(flag_map, layer_costs, i): - - median = np.median(layer_costs) - max_cost = np.max(layer_costs) - sorted_vals = np.sort(layer_costs) - - print ("**** Median = ", median) - print ("**** Max_cost = ", max_cost) - print ("**** Sorted_vals = ", sorted_vals, "\n\n") - - - if (layer_costs[i] > (median * 1.5)): - flag_map[7] = 1 - - if (layer_costs[i] > (median * 3)) or layer_costs[i] == max_cost: - flag_map[7] = 1 - flag_map[5] = 1 - flag_map[3] = 1 - - - if (layer_costs[i] < (median / 10)): - flag_map = {} - flag_map[12] = 1 - - return flag_map - - - - - -def constructKnobsFile(flags, layer_costs): - - f = open("local_knobs.txt", "w+") - for i in range(len(flags)): - flag_map = flags[i] - - if i > 0: - flag_map = addPromiseFlags(flag_map) - flag = addCostBasedFlags(flag_map, layer_costs, i) - - it = 0 - for flag in flag_map: - f.write(str(flag)) - if it < len(flag_map) - 1: - f.write(",") - it += 1 - - f.write("\n") - - f.close() - - - - - -def runPromiseAlgoBench(Bench): - - # NOTE-IMP: Changing current directory to one with promise binaries - dir_prefix = "../build_tuner/" - - - tuner_runs = Bench.autotuner_runs - - layer_costs = readCostFile(Bench.cost_file) - - flags1, flags2, flags3 = dumpBenchPareto(Bench) - - constructKnobsFile(flags1, layer_costs) - runAlgoTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.8, tuner_runs) - - constructKnobsFile(flags2, layer_costs) - runAlgoTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.6, tuner_runs) - - constructKnobsFile(flags3, layer_costs) - runAlgoTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.2, tuner_runs) - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_autotuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_autotuner.py deleted file mode 100644 index 800bf926a5dc3ac9a8d9cd6d7e6c3dfb5e829585..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_autotuner.py +++ /dev/null @@ -1,403 +0,0 @@ - -import os -import sys -import subprocess -import shutil - -from swing_selection import loadLayerDesc -from error_sensitivity import test_sensitivity, test_sensitivity2, test_sensitivity3, test_sensitivity4 -from benchmarks import bench_tuner_data, batch_id -from run_psnr import runPSNRTuner -from run_ha_tuner import runTunerBench -from run_hs_tuner import runPromiseBench -from run_algo_tuner import runAlgoBench -from run_algo_tuner2 import runPromiseAlgoBench -from compute_confs import computePSNRBenchSwings, computeBenchSwings -from validation import runPromiseBenchValidation2, runBenchValidation, runAlgoBenchValidate -from profiling import startProfile, stopProfile, dumpProfiles -from utils import createResultDirs -from benchmarks import batch_id -from run_devtime_tuner import DevTimeTuner - - - -def runTunerValidation(): - - runBenchValidation(bench_tuner_data["mobilenet_shallow"]) - - #runBenchValidation("mobilenet_cifar10") - - #runBenchValidation("alexnet_cifar10") - #runBenchValidation("vgg16_cifar10") - #runBenchValidation("alexnet2_cifar10") - #runBenchValidation("resnet18_cifar10") - #runBenchValidation("vgg16_cifar100") - - -def computeLayerSwings(): - - - computeBenchSwings(bench_tuner_data["mobilenet_shallow"]) - - #computeBenchSwings("mobilenet_cifar10") - - #computeBenchSwings("mobilenet_cifar10") - - #computeBenchSwings("lenet_keras") - #computeBenchSwings("alexnet_cifar10") - #computeBenchSwings("alexnet2_cifar10") - #computePSNRBenchSwings("pipeline_GEOM") - #computePSNRBenchSwings("pipeline_GEMO") - #computePSNRBenchSwings("pipeline_GEO") - #computePSNRBenchSwings("pipeline_GSM") - #computePSNRBenchSwings("pipeline_GSME") - - - - - -def runPromiseTuner(): - - - start = startProfile("MobileNet") - runPromiseBench(bench_tuner_data["mobilenet_cifar10"]) - stopProfile("MobileNet", start) - - start = startProfile("Alexnet") - runPromiseBench(bench_tuner_data["alexnet_cifar10"]) - stopProfile("Alexnet", start) - - start = startProfile("Alexnet2") - runPromiseBench(bench_tuner_data["alexnet2_cifar10"]) - stopProfile("Alexnet2", start) - - start = startProfile("VGG16_10") - runPromiseBench(bench_tuner_data["vgg16_cifar10"]) - stopProfile("VGG16_10", start) - - start = startProfile("VGG16_100") - runPromiseBench(bench_tuner_data["vgg16_cifar100"]) - stopProfile("VGG16_100", start) - - start = startProfile("ResNet") - runPromiseBench(bench_tuner_data["resnet18_cifar10"]) - stopProfile("ResNet", start) - - start = startProfile("MobileNet-SH") - runPromiseBench(bench_tuner_data["mobilenet_shallow"]) - stopProfile("MobileNet-SH", start) - - start = startProfile("LeNet") - runPromiseBench(bench_tuner_data["lenet_keras"]) - stopProfile("LeNet", start) - - - - #runPSNRPromiseBench("pipeline_GEOM") - #runPSNRPromiseBench("pipeline_GEMO") - #runPSNRPromiseBench("pipeline_GEO") - #runPSNRPromiseBench("pipeline_GSM") - #runPSNRPromiseBench("pipeline_GSME") - - dumpProfiles("time_profile" + batch_id + ".txt") - - - - -def runPromiseValidation(): - - - start = startProfile("AlexNet") - runPromiseBenchValidation2(bench_tuner_data["alexnet_cifar10"]) - stopProfile("AlexNet", start) - - start = startProfile("AlexNet2") - runPromiseBenchValidation2(bench_tuner_data["alexnet2_cifar10"]) - stopProfile("AlexNet2", start) - - start = startProfile("VGG16_100") - runPromiseBenchValidation2(bench_tuner_data["vgg16_cifar100"]) - stopProfile("VGG16_100", start) - - start = startProfile("VGG16_10") - runPromiseBenchValidation2(bench_tuner_data["vgg16_cifar10"]) - stopProfile("VGG16_10", start) - #runPromiseBenchValidation2(bench_tuner_data["lenet_keras"]) - - start = startProfile("ResNet") - runPromiseBenchValidation2(bench_tuner_data["resnet18_cifar10"]) - stopProfile("ResNet", start) - - start = startProfile("MobileNet_SH") - runPromiseBenchValidation2(bench_tuner_data["mobilenet_shallow"]) - stopProfile("MobileNet_SH", start) - - start = startProfile("MobileNet") - runPromiseBenchValidation2(bench_tuner_data["mobilenet_cifar10"]) - stopProfile("MobileNet", start) - - - dumpProfiles("validation_prof" + batch_id + ".txt") - - - - -def runAutotuner(): - - runTunerBench(bench_tuner_data["alexnet_cifar10"]) - runTunerBench(bench_tuner_data["alexnet2_cifar10"]) - - #runTunerBench("mobilenet_shallow") - #runTunerBench("mobilenet_cifar10") - - #runTunerBench("lenet_keras") - #runTunerBench("resnet18_cifar10") - #runTunerBench("vgg16_cifar10") - - #runPSNRTuner("pipeline_GEOM") - #runPSNRTuner("pipeline_GEMO") - #runPSNRTuner("pipeline_GEO") - #runPSNRTuner("pipeline_GSM") - #runPSNRTuner("pipeline_GSME") - - - - -def runSensAnalysis(): - - start = startProfile("LeNet") - test_sensitivity4(bench_tuner_data["lenet_keras"]) - stopProfile("LeNet", start) - - """ - start = startProfile("AlexNet") - test_sensitivity4(bench_tuner_data["alexnet_cifar10"]) - stopProfile("AlexNet", start) - - start = startProfile("AlexNet2") - test_sensitivity4(bench_tuner_data["alexnet2_cifar10"]) - stopProfile("AlexNet2", start) - - start = startProfile("ResNet") - test_sensitivity4(bench_tuner_data["resnet18_cifar10"]) - stopProfile("ResNet", start) - - start = startProfile("MobileNet") - test_sensitivity4(bench_tuner_data["mobilenet_cifar10"]) - stopProfile("MobileNet", start) - - start = startProfile("MobileNet_SH") - test_sensitivity4(bench_tuner_data["mobilenet_shallow"]) - stopProfile("MobileNet_SH", start) - - start = startProfile("VGG_10") - test_sensitivity4(bench_tuner_data["vgg16_cifar10"]) - stopProfile("VGG16_10", start) - - start = startProfile("VGG_100") - test_sensitivity4(bench_tuner_data["vgg16_cifar100"]) - stopProfile("VGG16_100", start) - - dumpProfiles("sens_time_prof.txt") - - """ - - start = startProfile("LeNet") - test_sensitivity3(bench_tuner_data["lenet_keras"]) - stopProfile("LeNet", start) - - start = startProfile("AlexNet") - test_sensitivity3(bench_tuner_data["alexnet_cifar10"]) - stopProfile("AlexNet", start) - - start = startProfile("AlexNet2") - test_sensitivity3(bench_tuner_data["alexnet2_cifar10"]) - stopProfile("AlexNet2", start) - - start = startProfile("ResNet") - test_sensitivity3(bench_tuner_data["resnet18_cifar10"]) - stopProfile("ResNet", start) - - - start = startProfile("MobileNet") - test_sensitivity3(bench_tuner_data["mobilenet_cifar10"]) - stopProfile("MobileNet", start) - - start = startProfile("MobileNet_SH") - test_sensitivity3(bench_tuner_data["mobilenet_shallow"]) - stopProfile("MobileNet_SH", start) - - start = startProfile("VGG_10") - test_sensitivity3(bench_tuner_data["vgg16_cifar10"]) - stopProfile("VGG16_10", start) - - start = startProfile("VGG_100") - test_sensitivity3(bench_tuner_data["vgg16_cifar100"]) - stopProfile("VGG16_100", start) - - dumpProfiles("sens_time_prof.txt") - - - """ - test_sensitivity2(bench_tuner_data["fc4"]) - test_sensitivity2(bench_tuner_data["lenet_keras"]) - test_sensitivity2(bench_tuner_data["mobilenet_cifar10"]) - test_sensitivity2(bench_tuner_data["mobilenet_shallow"]) - test_sensitivity2(bench_tuner_data["resnet18_cifar10"]) - test_sensitivity2(bench_tuner_data["alexnet_cifar10"]) - test_sensitivity2(bench_tuner_data["alexnet2_cifar10"]) - test_sensitivity2(bench_tuner_data["vgg16_cifar10"]) - test_sensitivity2(bench_tuner_data["vgg16_cifar100"]) - - - test_sensitivity(bench_tuner_data["fc4"]) - test_sensitivity(bench_tuner_data["lenet_keras"]) - test_sensitivity(bench_tuner_data["mobilenet_cifar10"]) - test_sensitivity(bench_tuner_data["mobilenet_shallow"]) - test_sensitivity(bench_tuner_data["resnet18_cifar10"]) - test_sensitivity(bench_tuner_data["alexnet_cifar10"]) - test_sensitivity(bench_tuner_data["alexnet2_cifar10"]) - test_sensitivity(bench_tuner_data["vgg16_cifar10"]) - test_sensitivity(bench_tuner_data["vgg16_cifar100"]) - """ - - - - - - -def runAlgoTuner(): - - Bench = bench_tuner_data["alexnet_cifar10"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["mobilenet_shallow"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["mobilenet_cifar10"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["vgg16_cifar10"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["lenet_keras"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["alexnet2_cifar10"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["vgg16_cifar100"] - runAlgoBench(Bench) - - Bench = bench_tuner_data["resnet18_cifar10"] - runAlgoBench(Bench) - - - - - -def runPromiseAlgoTuner(): - - Bench = bench_tuner_data["alexnet_cifar10"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["mobilenet_shallow"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["mobilenet_cifar10"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["vgg16_cifar10"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["lenet_keras"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["alexnet2_cifar10"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["vgg16_cifar100"] - runPromiseAlgoBench(Bench) - - Bench = bench_tuner_data["resnet18_cifar10"] - runPromiseAlgoBench(Bench) - - - - - - - -def runAlgoTunerValidation(): - - Bench = bench_tuner_data["alexnet_cifar10"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["mobilenet_shallow"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["mobilenet_cifar10"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["vgg16_cifar10"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["lenet_keras"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["alexnet2_cifar10"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["vgg16_cifar100"] - runAlgoBenchValidate(Bench) - - Bench = bench_tuner_data["resnet18_cifar10"] - runAlgoBenchValidate(Bench) - - - - - -# Invoke Dev-time Autotuner -def runDevTimeBenchs(): - - Bench = bench_tuner_data["lenet_keras"] - - lenetTuner = DevTimeTuner(Bench) - lenetTuner.runDevTuner() - - - - - -if __name__ == "__main__": - - createResultDirs(bench_tuner_data) - - - #-- runAutotuner() - - - #runTunerValidation() - - #computeLayerSwings() - - #runPromiseTuner() - - #runAlgoTuner() - - - runDevTimeBenchs() - - - #--- runPromiseAlgoTuner() - - - - #runAlgoTunerValidation() - - #runPromiseValidation() - - #runSensAnalysis() - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_devtime_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_devtime_tuner.py deleted file mode 100644 index 0c701714f2bfc57466c396c9c9a2522d954cb701..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_devtime_tuner.py +++ /dev/null @@ -1,311 +0,0 @@ - - -import os -import sys -import subprocess -import shutil -import time -from benchmarks import batch_id -import utils -import global_paths -import pareto_utils -import buildRtConfig -import genPlots - - - -class DevTimeTuner: - - def __init__(self, Bench): - - self.piped_execution = True - self.autotuner_runs = 8000 - - self.promise_binary = Bench.promise_binary - - if self.piped_execution: - self.binary_path = Bench.piped_binary - else: - self.binary_path = Bench.promise_binary - - self.num_layers = Bench.num_layers - self.gold_accuracy = Bench.promise_accuracy - self.cost_file = global_paths.tensorRT_dir + "/" + Bench.cost_file - self.layer_file = global_paths.tensorRT_dir + "/" + Bench.layer_file - #self.layer_knobs = global_paths.tensorRT_dir + "/" + Bench.layer_knobs - - global_knobs_file = global_paths.tensorRT_dir + "/autotuner/data/global_knobs.txt" - buildRtConfig.initializeApproxMap(global_knobs_file) # Initialize knobs - configfile gen - utils.createDevKnobs(self.layer_file, global_knobs_file, "dev_knobs.txt") - self.layer_knobs = "dev_knobs.txt" - - self.result_dir = global_paths.tensorRT_dir + "/" + Bench.base_dir + \ - "/loss_123/" + batch_id + "/dev_tuner/" - - # NOTE: maintains total iterations completed - across multiple invocations - self.iterations_completed = 0 - # Start time for timing autotuner runs - self.start_time = 0 - - - - - def invokeDevTunerScript(self, accuracy_slack, \ - additional_error_slack, autotuner_runs): - - accuracy_threshold = self.gold_accuracy - accuracy_slack - accuracy_additional_slack = self.gold_accuracy - additional_error_slack - - tuner_cmd = "python2 " + global_paths.opentuner_src_dir + "/devtuner.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(self.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += self.binary_path - tuner_cmd += " --num-layers " - tuner_cmd += str(self.num_layers) - tuner_cmd += " --result-dir " - tuner_cmd += self.result_dir - tuner_cmd += " --accuracy " - tuner_cmd += str(accuracy_threshold) - tuner_cmd += " --accuracy-slack " - tuner_cmd += str(accuracy_additional_slack) - tuner_cmd += " --cost-file " - tuner_cmd += self.cost_file - tuner_cmd += " --knobs-config " - tuner_cmd += global_paths.tensorRT_dir + "/autotuner/data/global_knobs.txt" - ### tuner_cmd += "../autotuner/data/global_knobs.txt" - tuner_cmd += " --layer-knobs " - tuner_cmd += self.layer_knobs - tuner_cmd += " --start-id " - tuner_cmd += str(self.iterations_completed) - - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - # Update iterations completed after each completed devtuner.py invocation with N iterations - self.iterations_completed += self.autotuner_runs - - - - def checkExistingDir(self): - - files_dir = self.result_dir + "/high_confidence/" - if os.path.exists(files_dir) and len(os.listdir(files_dir)) >= 1: - print ("result_dir = ", files_dir, " has existing files \n ") - user_str = input("Enter 'yes' to override - Enter 'no' to skip and exit \n ") - if user_str != "yes": - print ("\n\n NOTE:Exiting \n\n") - sys.exit(0) - - - - def dumpBestConfig(self, configurations): - - best_conf_path = self.result_dir + "/best_config.txt" - conf_file = open(best_conf_path, "w+") - - sorted_configurations = sorted(configurations, key=lambda conf: conf.speedup) - - if len(sorted_configurations) > 0: - best_conf = sorted_configurations[-1] - - conf_file.write("speedup = " + str(best_conf.speedup) + \ - " avg_loss = " + str(best_conf.avg_loss) + "\n") - - for flag in best_conf.flags: - conf_file.write(str(flag) + "\n") - - conf_file.close() - - - - def dumpAllConfigurations(self): - - input_dir = self.result_dir + "/high_confidence/" - - configurations = buildRtConfig.loadConfigsFromDir(input_dir, self.gold_accuracy) - bench_layer_composition = utils.getLayerComposition(self.layer_file) - - sorted_configurations = sorted(configurations, key=lambda conf: conf.speedup) - - # Adding an extra loss to tuned configurations - adjusting for unseen data - buildRtConfig.adjustConfigLosses(sorted_configurations) - - - config_out_path = self.result_dir + "dev_gpu_all_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "gpu") - - config_out_path = self.result_dir + "dev_cpu_all_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "cpu") - - - plot_file_path = self.result_dir + "dev_all_conf_plot.png" - genPlots.genScatterPlotFromConfigs(sorted_configurations, plot_file_path) - - - self.dumpBestConfig(sorted_configurations) - - - - def dumpBandPareto(self): - - input_dir = self.result_dir + "/high_confidence/" - output_dir = self.result_dir + "/pareto/" - utils.createDir(output_dir) - - configurations = pareto_utils.dumpParetoConfigsToDir(input_dir, \ - output_dir, self.gold_accuracy, True) - - config_out_path = self.result_dir + "dev_pareto_config.txt" - bench_layer_composition = utils.getLayerComposition(self.layer_file) - - #-- sorted_configurations = sorted(configurations, key=lambda conf: conf.avg_loss) - sorted_configurations = sorted(configurations, key=lambda conf: conf.speedup) - - # Adding an extra loss to tuned configurations - adjusting for unseen data - buildRtConfig.adjustConfigLosses(sorted_configurations) - - - config_out_path = self.result_dir + "dev_gpu_pareto_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "gpu") - - config_out_path = self.result_dir + "dev_cpu_pareto_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "cpu") - - plot_file_path = self.result_dir + "dev_pareto_plot.png" - genPlots.genScatterPlotFromConfigs(sorted_configurations, plot_file_path) - - - - def dumpTruePareto(self): - - input_dir = self.result_dir + "/high_confidence/" - output_dir = self.result_dir + "/true_pareto/" - utils.createDir(output_dir) - - # NOTE: This is a true pareto curve construction - configurations = pareto_utils.dumpParetoConfigsToDir(input_dir, \ - output_dir, self.gold_accuracy, False) - - config_out_path = self.result_dir + "true_pareto_config.txt" - bench_layer_composition = utils.getLayerComposition(self.layer_file) - - sorted_configurations = sorted(configurations, key=lambda conf: conf.avg_loss) - - # Adding an extra loss to tuned configurations - adjusting for unseen data - buildRtConfig.adjustConfigLosses(sorted_configurations) - - - config_out_path = self.result_dir + "true_gpu_pareto_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "gpu") - - config_out_path = self.result_dir + "true_cpu_pareto_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "cpu") - - - plot_file_path = self.result_dir + "true_pareto_plot.png" - genPlots.genScatterPlotFromConfigs(sorted_configurations, plot_file_path) - - - - - def dumpParetoFiles(self): - - self.dumpBandPareto() - self.dumpTruePareto() - - - - def dumpReferenceFiles(self): - - ref_dir = self.result_dir + "/references/" - utils.createDir(ref_dir) - - sources = {"run_devtime_tuner.py", "benchmarks.py", \ - "buildRtConfig.py", "global_paths.py"} - - for src in sources: - src_path = global_paths.tensorRT_dir + "/autotuner/tuner_driver_src/" + src - dst_path = ref_dir + "/" + src - shutil.copy(src_path, dst_path) - - - data_files = {self.cost_file, self.layer_file, self.layer_knobs} - - for datafile in data_files: - shutil.copy(datafile, ref_dir) - - - - def setBaselineAccuracy(self): - - self.gold_accuracy = utils.getBaselineAccuracy(self.promise_binary, self.num_layers) - print ("NOTE: Baseline Accuracy = ", self.gold_accuracy, "\n\n") - - - - def startTimer(self): - self.start_time = time.time() - print ("\n\n ---Starting DevTuner Timer ----- \n\n") - - - - def endTimer(self): - end_time = time.time() - total_tuning_time = end_time - self.start_time - time_hrs = total_tuning_time * 1.0 / (60 * 60) - print ("\n\n --- Time In Hours = ", time_hrs, " \n\n") - - time_file_path = self.result_dir + "tuning_time.txt" - - f = open(time_file_path, "w+") - f.write("time_hrs = " + str(time_hrs) + "\n") - f.close() - - - - - def runDevTuner(self): - - #self.checkExistingDir() - - - self.startTimer() - - self.setBaselineAccuracy() - - self.invokeDevTunerScript(0.8, 2.1, self.autotuner_runs) - self.invokeDevTunerScript(1.5, 2.1, self.autotuner_runs) - self.invokeDevTunerScript(2.1, 2.1, self.autotuner_runs) - - self.dumpParetoFiles() - self.dumpAllConfigurations() - - - # NOTE: dumping files for checking experimental parameters for each batch - self.dumpReferenceFiles() - - - self.endTimer() diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_ha_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_ha_tuner.py deleted file mode 100644 index 055d2c4c1bde6bf02e080c53101f03dc1791fd9e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_ha_tuner.py +++ /dev/null @@ -1,52 +0,0 @@ - - - -import subprocess - - - -#, bench_name -def runTunerBench(Bench): - - #Bench = bench_tuner_data[bench_name] - - #FIXIT: Replace approxhpvm_tuner2 with approxhpvm_tuner - tuner_cmd = "python ../opentuner/autotuner/approxhpvm_tuner.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.tuner_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_flags) - tuner_cmd += " --error-range " - tuner_cmd += str(Bench.error_range_2) - tuner_cmd += " --result-dir " - tuner_cmd += Bench.result_dir_2 - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.tuner_accuracy - 1.70) - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - tuner_cmd = "python ../opentuner/autotuner/approxhpvm_tuner.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.tuner_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_flags) - tuner_cmd += " --error-range " - tuner_cmd += str(Bench.error_range_1) - tuner_cmd += " --result-dir " - tuner_cmd += Bench.result_dir_1 - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.tuner_accuracy - 0.85) - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_hs_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_hs_tuner.py deleted file mode 100644 index f1a9c8f417bafdf4084a687670074101bec3faa0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_hs_tuner.py +++ /dev/null @@ -1,185 +0,0 @@ - - -import os -import subprocess -from error_sensitivity import select_skip_layers - - -def runPromiseTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_runs, skip_layers): - - tuner_cmd = "python2 ../opentuner/autotuner/promise_tuner3.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --start-range " - tuner_cmd += str(Bench.start_promise_range) - tuner_cmd += " --error-range " - #tuner_cmd += str(10) - # NOTE: Increasing flags from ApproxTechiqueTuner - tuner_cmd += str(12) - tuner_cmd += " --result-dir " - tuner_cmd += dir_prefix - tuner_cmd += result_dir + "/promise_tuner/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy - acc_threshold) - tuner_cmd += " --layer-file " - tuner_cmd += dir_prefix - tuner_cmd += Bench.tensor_desc_file - # NOTE: Cost file is new addition - ***NOT*** present in promisetuner1 and promisetuner2 - tuner_cmd += " --cost-file " - tuner_cmd += Bench.cost_file - #tuner_cmd += " --gpu-layers " - #tuner_cmd += str(Bench.skip_layers) - tuner_cmd += " --gpu-layers 0 " - tuner_cmd += " --skip-layers \"" - #tuner_cmd += str(Bench.skip_layer_str) + "\"" - tuner_cmd += str(skip_layers) + "\"" - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - - -def promiseTunerLoss1(Bench, dir_prefix): - - tuner_runs = Bench.autotuner_runs - - skip_layers1 = "0" - skip_layers2 = "0_" + select_skip_layers(Bench, 30) - skip_layers3 = "0_" + select_skip_layers(Bench, 50) - - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers1) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers2) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers3) - - -def promiseTunerLoss2(Bench, dir_prefix): - - tuner_runs = Bench.autotuner_runs - - skip_layers1 = "0" - skip_layers2 = "0_" + select_skip_layers(Bench, 20) - skip_layers3 = "0_" + select_skip_layers(Bench, 40) - - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers1) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers2) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers3) - - - -def promiseTunerLoss3(Bench, dir_prefix): - - tuner_runs = Bench.autotuner_runs - - skip_layers1 = "0" - skip_layers2 = "0_" + select_skip_layers(Bench, 10) - skip_layers3 = "0_" + select_skip_layers(Bench, 30) - - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers1) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers2) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers3) - - -BASELINE = True - - -def runPromiseBench(Bench): - - # NOTE-IMP: Changing current directory to one with promise binaries - dir_prefix = "../build_tuner/" - - - if BASELINE: - tuner_runs = Bench.autotuner_runs * 2 - skip_layers = "0" - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers) - runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers) - - else: - - promiseTunerLoss1(Bench, dir_prefix) - - promiseTunerLoss2(Bench, dir_prefix) - - promiseTunerLoss3(Bench, dir_prefix) - - - - - """ - #tuner_cmd = "python ../opentuner/autotuner/promise_tuner2.py " - tuner_cmd = "python ../opentuner/autotuner/promise_tuner3.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --start-range " - tuner_cmd += str(Bench.start_promise_range) - tuner_cmd += " --error-range " - #tuner_cmd += str(10) - tuner_cmd += " --result-dir " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.result_dir_2 + "/promise_tuner/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy - 1.90) - tuner_cmd += " --layer-file " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.tensor_desc_file - # NOTE: Cost file is new addition - ***NOT*** present in promisetuner1 and promisetuner2 - tuner_cmd += " --cost-file " - tuner_cmd += Bench.cost_file - #tuner_cmd += " --gpu-layers " - #tuner_cmd += str(Bench.skip_layers) - tuner_cmd += " --gpu-layers 0 " - tuner_cmd += " --skip-layers \"" - tuner_cmd += str(Bench.skip_layer_str) + "\"" - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - #tuner_cmd = "python ../opentuner/autotuner/promise_tuner2.py " - tuner_cmd = "python ../opentuner/autotuner/promise_tuner3.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --start-range " - tuner_cmd += str(Bench.start_promise_range) - tuner_cmd += " --error-range " - tuner_cmd += str(10) - tuner_cmd += " --result-dir " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.result_dir_1 + "/promise_tuner/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy - 0.95) - tuner_cmd += " --layer-file " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.tensor_desc_file - tuner_cmd += " --cost-file " - tuner_cmd += Bench.cost_file - #tuner_cmd += " --gpu-layers " - #tuner_cmd += str(Bench.skip_layers) - tuner_cmd += " --gpu-layers 0 " - tuner_cmd += " --skip-layers \"" - tuner_cmd += str(Bench.skip_layer_str) + "\"" - - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - """ diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_install_tuner.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_install_tuner.py deleted file mode 100644 index 6fe682eb4eb715ce3dd290ef77f20a06e5e18856..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_install_tuner.py +++ /dev/null @@ -1,164 +0,0 @@ - - -import os -import sys -import subprocess -import shutil -import time -from benchmarks import batch_id -import utils -import global_paths -import pareto_utils -import buildRtConfig -import genPlots -from run_devtime_tuner import DevTimeTuner -import validation -import knob_pruning - - -class InstallTimeTuner(DevTimeTuner): - - def __init__(self, Bench): - - self.knob_pruning = True - self.piped_execution = True - self.autotuner_runs = 10000 # Bench.autotuner_runs - self.validation_runs = 15 - self.abort_after = 3 - self.conf_threshold = 100 - - self.promise_binary = Bench.promise_binary - - if self.piped_execution: - self.binary_path = Bench.piped_binary - else: - self.binary_path = Bench.promise_binary - - self.num_layers = Bench.num_layers - #self.gold_accuracy = Bench.promise_accuracy - self.setBaselineAccuracy() - - - self.cost_file = global_paths.tensorRT_dir + "/" + Bench.cost_file - self.layer_file = global_paths.tensorRT_dir + "/" + Bench.layer_file - - - global_knobs_file = global_paths.tensorRT_dir + "/autotuner/data/global_knobs.txt" - buildRtConfig.initializeApproxMap(global_knobs_file) # Initialize knobs - configfile gen - - - if self.knob_pruning == False: - utils.createInstallAndDevKnobs(self.layer_file, global_knobs_file, "install_knobs.txt") - - elif self.knob_pruning == True: - pruned_knobs = knob_pruning.getPrunedKnobs(self.promise_binary, self.layer_file, \ - global_knobs_file, self.gold_accuracy, 3) - - print ("*** pruned_knobs = ", pruned_knobs) - utils.dumpKnobsFile(pruned_knobs, "install_knobs.txt") - - - - self.layer_knobs = "install_knobs.txt" - - self.result_dir = global_paths.tensorRT_dir + "/" + Bench.base_dir + \ - "/loss_123/" + batch_id + "/install_tuner/" - - # NOTE: maintains total iterations completed - across multiple invocations - self.iterations_completed = 0 - # Start time for timing autotuner runs - self.start_time = 0 - - - - def validateAccuracyConfigs(self, configurations, accuracy_slack): - - filtered_configs = [] - for config in configurations: - flags = config.flags - avg_acc, confidence = validation.getStatisticalConfidence(self.promise_binary, flags, \ - self.gold_accuracy, \ - accuracy_slack, self.validation_runs, \ - self.abort_after) - - print ("avg_acc, confidence = ", avg_acc, confidence) - - - if confidence >= self.conf_threshold: - config.avg_accuracy = avg_acc - filtered_configs.append(config) - - return filtered_configs - - - - - def dumpValidatedConfigs(self, accuracy_slack): - - #input_dir = self.result_dir + "/high_confidence/" - input_dir = self.result_dir + "/high_confidence/" - output_dir = self.result_dir + "/pareto/" - utils.createDir(output_dir) - - # Get Pareto Points with a "BAND" -- enable_band below is 'True' - configurations = pareto_utils.dumpParetoConfigsToDir(input_dir, \ - output_dir, self.gold_accuracy, True) - - print ("**** pareto config count = ", len(configurations), "\n") - time.sleep(10) - - #configurations = buildRtConfig.loadConfigsFromDir(input_dir, self.gold_accuracy) - bench_layer_composition = utils.getLayerComposition(self.layer_file) - - - filtered_configs = self.validateAccuracyConfigs(configurations, accuracy_slack) - - - sorted_configurations = sorted(filtered_configs, key=lambda conf: conf.speedup) - - - config_out_path = self.result_dir + "install_gpu_all_config.txt" - - buildRtConfig.dumpDevConfigsToRTFile(sorted_configurations, \ - config_out_path, bench_layer_composition, \ - self.gold_accuracy, "gpu") - - - plot_file_path = self.result_dir + "install_all_conf_plot.png" - genPlots.genScatterPlotFromConfigs(sorted_configurations, plot_file_path) - - - self.dumpBestConfig(sorted_configurations) - - - - - def runDevTuner(self): - - #self.checkExistingDir() - - - self.startTimer() - - self.setBaselineAccuracy() - - #self.invokeDevTunerScript(0.8, 2.1, self.autotuner_runs) - #self.invokeDevTunerScript(1.5, 2.1, self.autotuner_runs) - #self.invokeDevTunerScript(2.1, 2.1, self.autotuner_runs) - #self.invokeDevTunerScript(0.9, 2.1, self.autotuner_runs) - - # NOTE: for purposes of comparison with fedtuning - self.invokeDevTunerScript(2.1, 2.1, self.autotuner_runs) - - - #--- self.dumpParetoFiles() - self.dumpValidatedConfigs(2.1) - - - # NOTE: dumping files for checking experimental parameters for each batch - self.dumpReferenceFiles() - - - self.endTimer() - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_psnr.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_psnr.py deleted file mode 100644 index 77e70609b89f200e37af1a12348874f9d447d0cd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/run_psnr.py +++ /dev/null @@ -1,143 +0,0 @@ - - -import subprocess - - -def gen30dbFile(): - - f = open("psnr.txt", "w+"); - f.write("30"); - f.close() - - -def gen20dbFile(): - - f = open("psnr.txt", "w+"); - f.write("20"); - f.close() - - - -def runPSNRTuner(bench_name): - - Bench = bench_tuner_data[bench_name] - - # 20DB run - gen20dbFile() - tuner_cmd = "python ../opentuner/autotuner/approxhpvm_tuner.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.tuner_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_flags) - tuner_cmd += " --error-range " - tuner_cmd += str(Bench.error_range_2) - tuner_cmd += " --result-dir " - tuner_cmd += Bench.result_dir_2 - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.tuner_accuracy) - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - # 30DB run - gen30dbFile() - tuner_cmd = "python ../opentuner/autotuner/approxhpvm_tuner.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.tuner_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_flags) - tuner_cmd += " --error-range " - tuner_cmd += str(Bench.error_range_1) - tuner_cmd += " --result-dir " - tuner_cmd += Bench.result_dir_1 - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.tuner_accuracy) - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - -def runPSNRPromiseBench(bench_name): - - # NOTE-IMP: Changing current directory to one with promise binaries - #os.chdir("../build_promise/") - result_dir_prefix = "../build_tuner/" - - Bench = bench_tuner_data[bench_name] - - # 20db Run - gen20dbFile() - tuner_cmd = "python ../opentuner/autotuner/promise_tuner2.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --start-range " - tuner_cmd += str(Bench.start_promise_range) - tuner_cmd += " --error-range " - tuner_cmd += str(10) - tuner_cmd += " --result-dir " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.result_dir_2 + "/promise_tuner/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy) - tuner_cmd += " --layer-file " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.tensor_desc_file - tuner_cmd += " --gpu-layers 0 " - tuner_cmd += " --skip-layers \"" - tuner_cmd += str(Bench.skip_layer_str) + "\"" - - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - # 30DB run - gen30dbFile() - tuner_cmd = "python ../opentuner/autotuner/promise_tuner2.py " - tuner_cmd += " --test-limit " - tuner_cmd += str(Bench.autotuner_runs) - tuner_cmd += " --binary ./" - tuner_cmd += Bench.promise_binary - tuner_cmd += " --num-flags " - tuner_cmd += str(Bench.num_layers) - tuner_cmd += " --start-range " - tuner_cmd += str(Bench.start_promise_range) - tuner_cmd += " --error-range " - tuner_cmd += str(10) - tuner_cmd += " --result-dir " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.result_dir_1 + "/promise_tuner/" - tuner_cmd += " --accuracy " - tuner_cmd += str(Bench.promise_accuracy) - tuner_cmd += " --layer-file " - tuner_cmd += result_dir_prefix - tuner_cmd += Bench.tensor_desc_file - tuner_cmd += " --gpu-layers 0 " - tuner_cmd += " --skip-layers \"" - tuner_cmd += str(Bench.skip_layer_str) + "\"" - - - print (tuner_cmd) - - p = subprocess.Popen(tuner_cmd, shell=True) - p.wait() - - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/swing_selection.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/swing_selection.py deleted file mode 100644 index 399143c357c618aeba1665f5f1b8ecda4097d84c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/swing_selection.py +++ /dev/null @@ -1,304 +0,0 @@ - - -import os -import warnings -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from matplotlib.ticker import MultipleLocator -import numpy as np -from scipy.signal import savgol_filter -import math -import struct - - - -def readDataFromText(textFile): - results = [] - with open(textFile, "r") as f: - for line in f: - token = line.split("\t") - if (len(token) < 7): - continue - record = (token[0], float(token[1]), float(token[5]), float(token[6])) - results.append(record) - return results - - -convL1bins = [(0.985901, 1.36474), (0.852871, 1.16982), (0.422283, 0.55701), (0.259752, 0.335259), (0.216577, 0.277843), (0.185812, 0.23733), (0.148996, 0.189171), (0.100007, 0.125816), (0.0003127876261714846, 0.014511194080114365)] -convL2bins = [(0.995298, 1.3643), (0.861066, 1.16279), (0.426857, 0.547827), (0.262645, 0.330186), (0.218984, 0.273731), (0.187878, 0.233872), (0.150619, 0.186512), (0.10106, 0.124477), (0.00035427528200671077, 0.020199092105031013)] - -biasL1bins = [(0.3510325849056244, 0.49078235030174255), (0.30895063281059265, 0.4311973750591278), (0.16023841500282288, 0.22283604741096497), (0.099583700299263, 0.1381179839372635), (0.08340170979499817, 0.11503150314092636), (0.07280077040195465, 0.09948030859231949), (0.05857400223612785, 0.07965542376041412), (0.04044099152088165, 0.054193537682294846), (0.0, 0.0)] -biasL2bins = [(0.4154910147190094, 0.5820578932762146), (0.3656001389026642, 0.5121639370918274), (0.18930286169052124, 0.2637346684932709), (0.11687946319580078, 0.16306844353675842), (0.09796475619077682, 0.13558265566825867), (0.0848352462053299, 0.11619425565004349), (0.06783176958560944, 0.09277229756116867), (0.046059850603342056, 0.062238890677690506), (0.0, 0.0)] - -gemmL1bins= [(0.711203, 0.772211), (0.625894, 0.679601), (0.322665, 0.350383), (0.199646, 0.216727), (0.166556, 0.180781), (0.142945, 0.155132), (0.114662, 0.124399), (0.0771065, 0.0835984), (0.00034660729579627514, 0.008546584285795689)] -gemmL2bins= [(0.715208, 0.768102), (0.629411, 0.675947), (0.324433, 0.348358), (0.200659, 0.21539), (0.167381, 0.179634), (0.143637, 0.154119), (0.115197, 0.123548), (0.0774642, 0.0829647), (0.0003496285935398191, 0.009841435588896275)] - - - -def findBinByOp(op): - if op == 'tensorConv': - return convL1bins, convL2bins - if op == 'tensorAdd': - return biasL1bins, biasL2bins - if op == 'tensorGemm': - return gemmL1bins, gemmL2bins - - return None, None - - -def getSwing(Lx, opLxbin): - if opLxbin == None: - return 0 - for i, (minT, maxT) in enumerate(opLxbin): - if Lx > minT: - return i - - return 9 - - - -def getConfiguration(L_thresholds): - configuration = [] - for l in L_thresholds: - # L0 is op_type - opL1bin, opL2bin = findBinByOp(l[0]) - # NOTE: L2 is L1 error, L3 is L2 error - sL1 = getSwing(l[2], opL1bin) - sL2 = getSwing(l[3], opL2bin) - if sL1 < 7: - sL1 = sL1 + 1 - if sL2 < 7: - sL2 = sL2 + 1 - configuration.append((l[0], l[1], l[2], l[3], sL1, sL2, max(sL1, sL2))) - - return configuration - - -def displayConfig(config): - for c in config: - print(c) - -def displayMultipleConfigurations(configurations): - for f, c in configurations.items(): - print(f) - displayConfig(c) - print() - -def getConfigFromFile(filename): - L_requirements = readDataFromText(filename) - config = getConfiguration(L_requirements) - return config - - -def getConfigurationsFromDir(dirname): - configurations = dict() - for f in os.listdir(dirname): - configurations[f] = getConfigFromFile(os.path.join(dirname, f)) - - return configurations - - -def getLayerWiseTarget(config): - target = [] - for i, op in enumerate(config): - if (op[0] == 'tensorGemm') or (op[0] == 'tensorConv'): - t = op[6] - for j in range(i+1, len(config)): - if config[j][0] == 'tensorGemm' or config[j][0] == 'tensorConv': - break - t = max(t, config[j][6]) - target.append(t) - t = 0 - - return target - - -def dumpLayerWiseTarget(file, targets): - with open(file, "w") as f: - for name, t in targets.items(): - f.write(name) - f.write(" ") - for i in t: - f.write(str(i)) - f.write(" ") - f.write("\n") - - -def getTargetsFromConfigurations(configs): - targets = dict() - for f, c in configs.items(): - targets[f] = [d[6] for d in c] - - return targets - - -def dumpBenchmarkTargets(name, benchmark_dir): - benchmark_targets = dict() - error = ['linear', 'log', 'quad'] - for e in error: - results_dir = os.path.join(benchmark_dir, e) - configs = getConfigurationsFromDir(results_dir) - benchmark_targets[e] = getTargetsFromConfigurations(configs) - - return benchmark_targets - - - -def dumpTargets(filename, targets): - with open(filename, "w") as f: - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - for c in config: - f.write(str(c)) - f.write(" ") - f.write("\n") - - - -def getLayerSwings(layer_desc, configurations): - - layer_swings = [] - for i in range(len(configurations)): - config_vals = configurations[i] - if len(config_vals) == 0: - continue - - layer_index = 0 - index = 0 - swing_vals = [] - - while layer_index < len(layer_desc): - if len(layer_desc[layer_index]) == 1: - promise_swing = config_vals[index] - layer_type = layer_desc[layer_index][0] - layer_type = layer_type.strip() - print ("****layer_type = ", layer_type) - if layer_type != "conv" and layer_type != "dense": - promise_swing = -9 - if layer_type == "depthwise_conv": - promise_swing = -9 - index += 1 - else: - #print ("index = ", index) - # FIXIT: Doesn't look right - print (config_vals[index], config_vals[index+1]) - promise_swing = max(config_vals[index], config_vals[index+1]) - stride = len(layer_desc[layer_index]) - index += stride - - swing_vals.append(promise_swing) - layer_index += 1 - - layer_swings.append(swing_vals) - - return layer_swings - - - - -def loadLayerDesc(layer_desc_file): - - layer_desc = [] - f = open(layer_desc_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - - -def dumpLayerTargets(targets, tuned_result_dir, layer_desc_file): - - layer_desc = loadLayerDesc(layer_desc_file) - print (layer_desc) - - file_names = [] - configurations = [] - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - config_vals = [] - for c in config: - config_vals.append(c) - print (config_vals) - - configurations.append(config_vals) - - rank = e + "_" + "_".join(name.split("_")[-2:]) - file_names.append(rank) - - - # NOTE: get PROMISE swing values corresponding to each layer - layer_swings = getLayerSwings(layer_desc, configurations) - - targets_file_path = tuned_result_dir + "/layer_targets.txt" - f = open(targets_file_path, "w+") - - for config in layer_swings: - index = 0 - for swing in config: - swing_str = "" - if swing == 8 or swing == 9: - layer_size = len(layer_desc[index]) - for i in range(layer_size): - swing_str += str(swing) - if i < layer_size - 1: - swing_str += " " - elif swing == -9: - swing_str += "8" - else: - swing_str += str(swing) - - if index < len(config) - 1: - swing_str += "," - - f.write(swing_str) - index += 1 - - f.write("\n") - - f.close() - - print(layer_swings) - return layer_swings, file_names - - - -def replaceFirstLayer(layer_swings): - - # Ensuring first conv on GPU - for conf in layer_swings: - conf[0] = 9 - - - -def computeLayerTargets(tuned_result_dir, layer_desc_file): - - targets_file_path = tuned_result_dir + "/tensor_targets.txt" - targets = dumpBenchmarkTargets(targets_file_path, tuned_result_dir) - - dumpTargets(targets_file_path, targets) - - layer_swings, file_names = dumpLayerTargets(targets, tuned_result_dir, layer_desc_file) - - replaceFirstLayer(layer_swings) - - return layer_swings, file_names - - -# Externally-called function -def compute_swing_selection(tuned_result_dir, layer_file): - - return computeLayerTargets(tuned_result_dir, layer_file) - - - - -if __name__ == "__main__": - - tuned_result_dir = "./vgg16_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition.txt" - - tuned_result_dir = "./resnet18_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition2.txt" - computeLayerTargets(tuned_result_dir, layer_file) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/swing_selection2.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/swing_selection2.py deleted file mode 100644 index 588edad2a289a67d30c1ade15d4737556327f4fb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/swing_selection2.py +++ /dev/null @@ -1,289 +0,0 @@ - - -import os -import warnings -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from matplotlib.ticker import MultipleLocator -import numpy as np -from scipy.signal import savgol_filter -import math -import struct - - - -def readDataFromText(textFile): - results = [] - with open(textFile, "r") as f: - for line in f: - token = line.split("\t") - if (len(token) < 7): - continue - record = (token[0], float(token[1]), float(token[5]), float(token[6])) - results.append(record) - return results - - -convL1bins = [(0.985901, 1.36474), (0.852871, 1.16982), (0.422283, 0.55701), (0.259752, 0.335259), (0.216577, 0.277843), (0.185812, 0.23733), (0.148996, 0.189171), (0.100007, 0.125816), (0.0003127876261714846, 0.014511194080114365)] -convL2bins = [(0.995298, 1.3643), (0.18, 0.19), (0.14, 0.16), (0.11, 0.12), (0.08, 0.09), (0.06, 0.07), (0.04, 0.05), (0.029, 0.035), (0.00031427528200671077, 0.020199092105031013)] -#convL2bins = [(0.995298, 1.3643), (0.18, 0.19), (0.14, 0.16), (0.11, 0.12), (0.08, 0.09), (0.06, 0.07), (0.04, 0.05), (0.001, 0.004), (0.00031427528200671077, 0.020199092105031013)] - -biasL1bins = [(0.3510325849056244, 0.49078235030174255), (0.30895063281059265, 0.4311973750591278), (0.16023841500282288, 0.22283604741096497), (0.099583700299263, 0.1381179839372635), (0.08340170979499817, 0.11503150314092636), (0.07280077040195465, 0.09948030859231949), (0.05857400223612785, 0.07965542376041412), (0.04044099152088165, 0.054193537682294846), (0.0, 0.0)] -biasL2bins = [(0.4154910147190094, 0.5820578932762146), (0.3656001389026642, 0.5121639370918274), (0.18930286169052124, 0.2637346684932709), (0.11687946319580078, 0.16306844353675842), (0.09796475619077682, 0.13558265566825867), (0.0848352462053299, 0.11619425565004349), (0.06783176958560944, 0.09277229756116867), (0.046059850603342056, 0.062238890677690506), (0.0, 0.0)] - -gemmL1bins= [(0.711203, 0.772211), (0.625894, 0.679601), (0.322665, 0.350383), (0.199646, 0.216727), (0.166556, 0.180781), (0.142945, 0.155132), (0.114662, 0.124399), (0.0771065, 0.0835984), (0.00034660729579627514, 0.008546584285795689)] -gemmL2bins= [(0.715208, 0.768102), (0.629411, 0.675947), (0.324433, 0.348358), (0.200659, 0.21539), (0.167381, 0.179634), (0.143637, 0.154119), (0.115197, 0.123548), (0.0774642, 0.0829647), (0.0003496285935398191, 0.009841435588896275)] - - - -def findBinByOp(op): - if op == 'tensorConv': - return convL1bins, convL2bins - if op == 'tensorAdd': - return biasL1bins, biasL2bins - if op == 'tensorGemm': - return gemmL1bins, gemmL2bins - - return None, None - - -def getSwing(Lx, opLxbin): - if opLxbin == None: - return 0 - for i, (minT, maxT) in enumerate(opLxbin): - if Lx > minT: - return i - - return 9 - - - -def getConfiguration(L_thresholds): - configuration = [] - for l in L_thresholds: - # L0 is op_type - opL1bin, opL2bin = findBinByOp(l[0]) - # NOTE: L2 is L1 error, L3 is L2 error - # only using L2 for image pipelines - sL2 = getSwing(l[3], opL2bin) - if sL2 < 7: - sL2 = sL2 + 1 - configuration.append((l[0], l[1], l[2], l[3], sL2, sL2, sL2)) - - return configuration - - -def displayConfig(config): - for c in config: - print(c) - -def displayMultipleConfigurations(configurations): - for f, c in configurations.items(): - print(f) - displayConfig(c) - print() - -def getConfigFromFile(filename): - L_requirements = readDataFromText(filename) - config = getConfiguration(L_requirements) - return config - - -def getConfigurationsFromDir(dirname): - configurations = dict() - for f in os.listdir(dirname): - configurations[f] = getConfigFromFile(os.path.join(dirname, f)) - - return configurations - - -def getLayerWiseTarget(config): - target = [] - for i, op in enumerate(config): - if (op[0] == 'tensorGemm') or (op[0] == 'tensorConv'): - t = op[6] - target.append(t) - - return target - - -def dumpLayerWiseTarget(file, targets): - with open(file, "w") as f: - for name, t in targets.items(): - f.write(name) - f.write(" ") - for i in t: - f.write(str(i)) - f.write(" ") - f.write("\n") - - -def getTargetsFromConfigurations(configs): - targets = dict() - for f, c in configs.items(): - targets[f] = [d[6] for d in c] - - return targets - - -def dumpBenchmarkTargets(name, benchmark_dir): - benchmark_targets = dict() - error = ['linear', 'log', 'quad'] - for e in error: - results_dir = os.path.join(benchmark_dir, e) - configs = getConfigurationsFromDir(results_dir) - benchmark_targets[e] = getTargetsFromConfigurations(configs) - - return benchmark_targets - - -def dumpTargets(filename, targets): - with open(filename, "w") as f: - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - for c in config: - f.write(str(c)) - f.write(" ") - f.write("\n") - - - -def getLayerSwings(layer_desc, configurations): - - layer_swings = [] - for i in range(len(configurations)): - config_vals = configurations[i] - layer_index = 0 - index = 0 - swing_vals = [] - - while layer_index < len(layer_desc): - if len(layer_desc[layer_index]) == 1: - promise_swing = config_vals[index] - layer_type = layer_desc[layer_index] - if layer_type != "conv" and layer_type != "dense": - promise_swing = -9 - index += 1 - else: - print (config_vals[index], config_vals[index+1]) - promise_swing = max(config_vals[index], config_vals[index+1]) - stride = len(layer_desc[layer_index]) - #print ("*stride = ", stride) - index += stride - - swing_vals.append(promise_swing) - layer_index += 1 - - layer_swings.append(swing_vals) - - return layer_swings - - - - -def loadLayerDesc(layer_desc_file): - - layer_desc = [] - f = open(layer_desc_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - - -def dumpLayerTargets(targets, tuned_result_dir, layer_desc_file): - - layer_desc = loadLayerDesc(layer_desc_file) - print (layer_desc) - - file_names = [] - configurations = [] - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - config_vals = [] - for c in config: - config_vals.append(c) - print (config_vals) - - configurations.append(config_vals) - - rank = e + "_" + "_".join(name.split("_")[-2:]) - file_names.append(rank) - - - # NOTE: get PROMISE swing values corresponding to each layer - layer_swings = getLayerSwings(layer_desc, configurations) - - targets_file_path = tuned_result_dir + "/layer_targets.txt" - f = open(targets_file_path, "w+") - - for config in layer_swings: - index = 0 - for swing in config: - swing_str = "" - if swing == 8 or swing == 9: - layer_size = len(layer_desc[index]) - for i in range(layer_size): - swing_str += str(swing) - if i < layer_size - 1: - swing_str += " " - elif swing == -9: - swing_str += "8" - else: - swing_str += str(swing) - - if index < len(config) - 1: - swing_str += "," - - f.write(swing_str) - index += 1 - - f.write("\n") - - f.close() - - print(layer_swings) - return layer_swings, file_names - - - -def replaceFirstLayer(layer_swings): - - # Ensuring first conv on GPU - for conf in layer_swings: - conf[0] = 9 - - - -def computeLayerTargets(tuned_result_dir, layer_desc_file): - - targets_file_path = tuned_result_dir + "/tensor_targets.txt" - targets = dumpBenchmarkTargets(targets_file_path, tuned_result_dir) - - dumpTargets(targets_file_path, targets) - - layer_swings, file_names = dumpLayerTargets(targets, tuned_result_dir, layer_desc_file) - - replaceFirstLayer(layer_swings) - - return layer_swings, file_names - - -# Externally-called function -def compute_swing_selection2(tuned_result_dir, layer_file): - - return computeLayerTargets(tuned_result_dir, layer_file) - - - - -if __name__ == "__main__": - - tuned_result_dir = "./vgg16_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition.txt" - - tuned_result_dir = "./resnet18_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition2.txt" - computeLayerTargets(tuned_result_dir, layer_file) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/utils.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/utils.py deleted file mode 100644 index 9ff3622d13c1c0c65a21938d487d968efae428f0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/utils.py +++ /dev/null @@ -1,296 +0,0 @@ - - -import os -import sys -import subprocess -from benchmarks import batch_id -from global_paths import tensorRT_dir - - - -def createDir(dir_path): - - try: - if not os.path.exists(dir_path): - os.mkdir(dir_path) - except: - print ("!ERROR: Could NOT create result directory = ", dir_path) - sys.exit(-1) - - -def createResultDirs(benchmarks): - - for bench_name in benchmarks: - Bench = benchmarks[bench_name] - - print ("Base Directory: ", Bench.base_dir , "BatchId = ", batch_id) - - createDir(tensorRT_dir + Bench.base_dir + "/loss_1") - createDir(tensorRT_dir + Bench.base_dir + "/loss_2") - createDir(tensorRT_dir + Bench.base_dir + "/loss_3") - createDir(tensorRT_dir + Bench.base_dir + "/loss_123/" + batch_id) - createDir(tensorRT_dir + Bench.base_dir + "/loss_1/" + batch_id) - createDir(tensorRT_dir + Bench.base_dir + "/loss_2/" + batch_id) - createDir(tensorRT_dir + Bench.base_dir + "/loss_3/" + batch_id) - createDir(tensorRT_dir + Bench.base_dir + "/loss_123/" + batch_id + "/devtuner/") - createDir(tensorRT_dir + Bench.base_dir + "/loss_1/" + batch_id + "/devtuner/" ) - createDir(tensorRT_dir + Bench.base_dir + "/loss_2/" + batch_id + "/devtuner/") - createDir(tensorRT_dir + Bench.base_dir + "/loss_3/" + batch_id + "/devtuner/") - - - - - -def genBaselineConfig(flags_file_path, default_flag, num_layers): - - f = open(flags_file_path, "w+") - for i in range(num_layers): - f.write(str(default_flag) + "\n") - - f.close() - - - -def readAccuracy(accuray_res_file): - - file = open(accuray_res_file, "r") - accuracy_str = file.read() - file.close() - accuracy = 0 # float(accuracy_str) - - try: - accuracy = float(accuracy_str) - except: - print("ERROR: Reading Accuray file - Aborting.... \n") - sys.exit(-1) - - - print ("*Configuration Accuracy = ", accuracy) - return accuracy - - - -#**** Exported Function ****/ -def getBaselineAccuracy(binary_path, num_layers): - - genBaselineConfig("promise_flags", 11, num_layers) - - run_cmd = "./" + binary_path - print (run_cmd) - - p = subprocess.Popen(run_cmd, shell=True) - p.wait() - - return readAccuracy("final_accuracy") - - - -def getLayerComposition(layer_composition_file): - - layer_desc = [] - f = open(layer_composition_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - -def debug_print(str): - - debug_flag = False - - if debug_flag == True: - print (str) - - - -def readOpKnobs(global_knobs_file, op_type, analysis_mode): - - knobs_file = open(global_knobs_file, "r") - - tuning_knobs = [] - for knob in knobs_file: - toks = knob.split("\t") - if op_type in toks[-1] and analysis_mode in toks[-2]: - knob_id = toks[0].split(",")[1] - tuning_knobs.append(knob_id) - - return tuning_knobs - - - -def readConvKnobs(global_knobs_file, analysis_mode): - - return readOpKnobs(global_knobs_file, "conv", analysis_mode) - - -def readFCKnobs(global_knobs_file, analysis_mode): - - return readOpKnobs(global_knobs_file, "fc", analysis_mode) - - -def readRedKnobs(global_knobs_file, analysis_mode): - - return readOpKnobs(global_knobs_file, "red", analysis_mode) - - - - -def createDevKnobs(layer_file, global_knobs_file, out_file): - - f = open(layer_file, "r") - - conv_knobs = readConvKnobs(global_knobs_file, "dev") - fc_knobs = readFCKnobs(global_knobs_file, "dev") - red_knobs = readRedKnobs(global_knobs_file, "dev") - - print (conv_knobs, fc_knobs, red_knobs) - - f_out = open(out_file, "w+") - - for x in f: - if "conv" in x: - f_out.write(",".join(conv_knobs) + "\n") - if "dense" in x: - f_out.write(",".join(fc_knobs) + "\n") - if "red" in x: - f_out.write(",".join(red_knobs) + "\n") - - f_out.close() - - - -def removePromiseKnobs(conv_knobs): - - promise_knobs = ["1", "2", "3", "4", "5", "6", "7"] - conv_knobs2 = [] - - for knob in conv_knobs: - if knob not in promise_knobs: - conv_knobs2.append(knob) - - return conv_knobs2 - - - -def createInstallAndDevKnobs(layer_file, global_knobs_file, out_file): - - f = open(layer_file, "r") - - conv_knobs_dev = readConvKnobs(global_knobs_file, "dev") - fc_knobs_dev = readFCKnobs(global_knobs_file, "dev") - red_knobs_dev = readRedKnobs(global_knobs_file, "dev") - - conv_knobs_install = readConvKnobs(global_knobs_file, "install") - fc_knobs_install = readFCKnobs(global_knobs_file, "install") - red_knobs_install = readRedKnobs(global_knobs_file, "install") - - - - #conv_knobs_dev.remove("11") # remove FP32 from install-time tuning - #fc_knobs_dev.remove("11") # remove FP32 from install-time tuning - #red_knobs_dev.remove("11") # remove FP32 from install-time tuning - - conv_knobs = conv_knobs_dev + conv_knobs_install - fc_knobs = fc_knobs_dev + fc_knobs_install - red_knobs = red_knobs_dev + red_knobs_install - - print (conv_knobs, fc_knobs, red_knobs) - - #sys.exit(0) - - f_out = open(out_file, "w+") - - ind = 0 - for x in f: - if "conv" in x: - layer_conv_knobs = conv_knobs - if ind == 0: - layer_conv_knobs = removePromiseKnobs(conv_knobs) - f_out.write(",".join(layer_conv_knobs) + "\n") - if "dense" in x: - f_out.write(",".join(fc_knobs) + "\n") - if "red" in x: - f_out.write(",".join(red_knobs) + "\n") - - ind += 1 - - f_out.close() - - - - - -def getInstallAndDevKnobs(layer_file, global_knobs_file): - - f = open(layer_file, "r") - - conv_knobs_dev = readConvKnobs(global_knobs_file, "dev") - fc_knobs_dev = readFCKnobs(global_knobs_file, "dev") - red_knobs_dev = readRedKnobs(global_knobs_file, "dev") - - conv_knobs_install = readConvKnobs(global_knobs_file, "install") - fc_knobs_install = readFCKnobs(global_knobs_file, "install") - red_knobs_install = readRedKnobs(global_knobs_file, "install") - - - conv_knobs = conv_knobs_dev + conv_knobs_install - fc_knobs = fc_knobs_dev + fc_knobs_install - red_knobs = red_knobs_dev + red_knobs_install - - print (conv_knobs, fc_knobs, red_knobs) - - - bench_knobs = [] - - ind = 0 - for x in f: - if "conv" in x: - layer_conv_knobs = conv_knobs - if ind == 0: - layer_conv_knobs = removePromiseKnobs(conv_knobs) - bench_knobs.append(layer_conv_knobs) - if "dense" in x: - bench_knobs.append(fc_knobs) - if "red" in x: - bench_knobs.append(red_knobs) - - ind += 1 - - - return bench_knobs - - - - - - -def dumpKnobsFile(knobs, out_file): - - - f_out = open(out_file, "w+") - - for layer_knobs in knobs: - f_out.write(",".join(layer_knobs) + "\n") - - f_out.close() - - - - - - - - -if __name__ == "__main__": - - - #createDevKnobs("../data/alexnet2/alexnet2_layers.txt", \ - # "../data/global_knobs.txt", "dev_knobs.txt") - - - knobs = getInstallAndDevKnobs("../data/alexnet2/alexnet2_layers.txt", \ - "../data/global_knobs.txt") - - print ("*** knobs = ", knobs) diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/validation.py b/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/validation.py deleted file mode 100644 index c334b2b319965ad8a866a2ec9d86137e8a5c6a28..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/autotuner/tuner_driver_src/validation.py +++ /dev/null @@ -1,802 +0,0 @@ - - -import os -import sys -import subprocess -import shutil -from compute_confs import computePSNRBenchSwings, computeBenchSwings -from buildRtConfig import loadConfigData - - - -def getLayerString(layer_swings): - - index = 0 - layer_string = "" - for swing in layer_swings: - layer_string += str(swing) - if index < len(layer_swings) - 1: - layer_string += "," - return layer_string - - - -def testValidationRun(Bench, validation_dir, layer_swings, threshold, rank_str): - - #### FIXME - #os.chdir("../build_promise/") - - validation_acc = Bench.validation_accuracy - target_acc = validation_acc - threshold - - validation_binary = Bench.validation_binary - - # Write to promise_flags - fout = open("promise_flags", "w+") - for swing in layer_swings: - int_swing = int(swing) - if int_swing > 0: - fout.write(str(swing) + "\n") - fout.close() - - # Execute Validation Run - p = subprocess.Popen("./" + validation_binary, shell=True) - p.wait() - - f = open("run_accuracies.txt") - index = 0.0 - unsuccessful = 0.0 - sum_acc = 0.0 - for x in f: - x = x.strip() - acc = float(x) - if acc < target_acc: - unsuccessful += 1 - index += 1 - sum_acc += acc - - f.close() - - confidence = ( (index - unsuccessful) / index) * 100.0 - print ("run_confidence = ", confidence) - avg_acc = sum_acc / index - - out_fname = validation_dir + validation_binary + "_" + str(avg_acc) - shutil.copy("run_accuracies.txt", out_fname + "_" + rank_str) - - layer_string = getLayerString(layer_swings) - f = open(out_fname, "w") - f.write("config:\t" + layer_string + "\n") - f.write("confidence:\t" + str(confidence) + "\n") - f.close() - - return confidence - - - - -def testPromiseRun(Bench, layer_swings, threshold): - - #### FIXME - #os.chdir("../build_promise/") - - validation_acc = Bench.validation_accuracy - target_acc = validation_acc - threshold - - validation_binary = Bench.validation_binary - - # Write to promise_flags - fout = open("promise_flags", "w+") - for swing in layer_swings: - int_swing = int(swing) - if int_swing > 0: - fout.write(str(swing) + "\n") - fout.close() - - # Execute Validation Run - p = subprocess.Popen("./" + validation_binary, shell=True) - p.wait() - - f = open("run_accuracies.txt") - index = 0.0 - unsuccessful = 0.0 - sum_acc = 0.0 - for x in f: - x = x.strip() - acc = float(x) - if acc < target_acc: - unsuccessful += 1 - index += 1 - sum_acc += acc - - f.close() - - confidence = ( (index - unsuccessful) / index) * 100.0 - print ("run_confidence = ", confidence) - avg_acc = sum_acc / index - - return confidence - - - - - - - - -def dumpConfigConfidence(configs, confidence_list, - result_dir, layer_desc_file): - - #### FIXME - #os.chdir("../build_tuner/") - - layer_desc = loadLayerDesc(layer_desc_file) - print (layer_desc) - - f = open(result_dir + "/conf_confidences.txt", "w+") - - count = 0 - for config in configs: - index = 0 - for swing in config: - swing_str = "" - if swing == 8 or swing == 9: - layer_size = len(layer_desc[index]) - for i in range(layer_size): - swing_str += str(swing) - if i < layer_size - 1: - swing_str += " " - elif swing == -9: - swing_str += "8" - else: - swing_str += str(swing) - - if index < len(config) - 1: - swing_str += "," - f.write(swing_str) - - index += 1 - - f.write("\t" + str(confidence_list[count])) - f.write("\n") - count +=1 - - f.close() - - - - -def dumpValidatedConfigs(configs, result_dir, layer_desc_file, - output_file_name): - - os.chdir("../build_tuner/") - - layer_desc = loadLayerDesc(layer_desc_file) - print (layer_desc) - - f = open(result_dir + "/" + output_file_name, "w+") - - for config in configs: - index = 0 - for swing in config: - swing_str = "" - if swing == 8 or swing == 9: - layer_size = len(layer_desc[index]) - for i in range(layer_size): - swing_str += str(swing) - if i < layer_size - 1: - swing_str += " " - elif swing == -9: - swing_str += "8" - else: - swing_str += str(swing) - - if index < len(config) - 1: - swing_str += "," - f.write(swing_str) - - index += 1 - f.write("\n") - f.close() - - - -def dumpRankings(validated_ranks, result_dir, rank_file): - - os.chdir("../build_tuner/") - f = open(result_dir + "/" + rank_file, "w+") - for rank in validated_ranks: - f.write(rank + "\n") - - f.close() - - - - -def replaceFP32Configs(loss_confs1, loss_confs2): - - for swing_conf in loss_confs1: - for i in range(0, len(swing_conf)): - if swing_conf[i] == 9: - swing_conf[i] = 8 - if i == len(swing_conf) - 1: - swing_conf[i] = 7 - - for swing_conf in loss_confs2: - for i in range(0, len(swing_conf)): - if swing_conf[i] == 9: - swing_conf[i] = 8 - if i == len(swing_conf) - 1: - swing_conf[i] = 7 - - - return loss_confs1, loss_confs2 - - - -def replaceGPUConfigs(Bench, loss_confs1, loss_confs2): - - skip_layer_str = Bench.skip_layer_str - layer_ids = skip_layer_str.split("_") - skip_layers = [] - for layer_id in layer_ids: - skip_layers.append(int(layer_id)) - - - for swing_conf in loss_confs1: - for i in range(0, len(swing_conf)): - if i in skip_layers and swing_conf[i] < 8: - swing_conf[i] = 8 - - for swing_conf in loss_confs2: - for i in range(0, len(swing_conf)): - if i in skip_layers and swing_conf[i] < 8: - swing_conf[i] = 8 - - - return loss_confs1, loss_confs2 - - - - -def runBenchValidation(Bench): - - #Bench = bench_tuner_data[bench_name] - - loss_confs, conf_ranks = computeBenchSwings(Bench) - loss1_confs = loss_confs[0] - loss2_confs = loss_confs[1] - conf_ranks1 = conf_ranks[0] - conf_ranks2 = conf_ranks[1] - - #loss1_confs, loss2_confs = replaceFP32Configs(loss1_confs, loss2_confs) - - - validation_dir_1 = "../build_tuner/" + Bench.result_dir_1 + "/validation_runs/" - if not os.path.exists(validation_dir_1): - os.mkdir(validation_dir_1) - - validation_dir_2 = "../build_tuner/" + Bench.result_dir_2 + "/validation_runs/" - if not os.path.exists(validation_dir_2): - os.mkdir(validation_dir_2) - - - ind = 0 - validated_confs1 = [] - validated_ranks1 = [] - failed_confs1 = [] - confidences1 = [] - for layer_swings in loss1_confs: - print ("len(layer_Swings) = ", len(layer_swings), "\n") - confidence = testValidationRun(Bench, validation_dir_1, - layer_swings, 1.0, conf_ranks1[ind]) - if confidence >= 95: - validated_confs1.append(layer_swings) - confidences1.append(confidence) - validated_ranks1.append(conf_ranks1[ind]) - else: - failed_confs1.append(layer_swings) - ind += 1 - - - ind = 0 - validated_confs2 = [] - validated_ranks2 = [] - failed_confs2 = [] - confidences2 = [] - for layer_swings in loss2_confs: - confidence = testValidationRun(Bench, validation_dir_2, layer_swings, 2.0, conf_ranks2[ind]) - if confidence >= 92: - validated_confs2.append(layer_swings) - confidences2.append(confidence) - validated_ranks2.append(conf_ranks2[ind]) - else: - failed_confs2.append(layer_swings) - ind += 1 - - dumpValidatedConfigs(validated_confs1, Bench.result_dir_1, - Bench.layer_file, "validated_confs.txt") - dumpValidatedConfigs(validated_confs2, Bench.result_dir_2, - Bench.layer_file, "validated_confs.txt") - - dumpValidatedConfigs(failed_confs1, Bench.result_dir_1, - Bench.layer_file, "failed_confs.txt") - dumpValidatedConfigs(failed_confs2, Bench.result_dir_2, - Bench.layer_file, "failed_confs.txt") - - dumpRankings(validated_ranks1, Bench.result_dir_1, "validated_ranks.txt") - dumpRankings(validated_ranks2, Bench.result_dir_2, "validated_ranks.txt") - - dumpConfigConfidence(validated_confs1, confidences1, - Bench.result_dir_1, Bench.layer_file) - - dumpConfigConfidence(validated_confs2, confidences2, - Bench.result_dir_2, Bench.layer_file) - - - print (validated_confs1) - print (validated_confs2) - - - -def readPromiseResults(loss1_file, loss2_file): - - loss_confs = [] - loss1_confs = [] - f1 = open(loss1_file) - for x in f1: - print (x) - swing_toks = x.split(",") - swing_list = [] - for swing_str in swing_toks: - swing_val = int(swing_str.split(" ")[0]) - swing_list.append(swing_val) - loss1_confs.append(swing_list) - - loss_confs.append(loss1_confs) - - loss2_confs = [] - f2 = open(loss1_file) - for x in f2: - swing_toks = x.split(",") - swing_list = [] - for swing_str in swing_toks: - swing_val = int(swing_str.split(" ")[0]) - swing_list.append(swing_val) - loss2_confs.append(swing_list) - - loss_confs.append(loss2_confs) - - return loss_confs - - - - - - -def readPromiseResults2(loss1_file, loss2_file, layer_file): - - layer_desc = loadLayerDesc(layer_file) - - loss_confs = [] - loss1_confs = [] - f1 = open(loss1_file) - for x in f1: - print (x) - swing_toks = x.split(",") - swing_list = [] - - it = 0 - for swing_str in swing_toks: - swing_val = int(swing_str.split(" ")[0]) - if "conv" in layer_desc[it] or "dense" in layer_desc[it]: - swing_list.append(swing_val) - - it += 1 - - loss1_confs.append(swing_list) - - loss_confs.append(loss1_confs) - - loss2_confs = [] - f2 = open(loss1_file) - for x in f2: - swing_toks = x.split(",") - swing_list = [] - - it = 0 - for swing_str in swing_toks: - swing_val = int(swing_str.split(" ")[0]) - if "conv" in layer_desc[it] or "dense" in layer_desc[it]: - swing_list.append(swing_val) - - it += 1 - - loss2_confs.append(swing_list) - - loss_confs.append(loss2_confs) - - return loss_confs - - - - - -def readPromiseResults3(result_dir): - - loss_confs = [] - # NOTE: Second parameter is ignored - config_arr = loadConfigData(result_dir, 100) - - for config in config_arr: - loss_confs.append(config.flags) - - return loss_confs - - - - - - - - -def runPromiseBenchValidation(Bench): - - - dir_prefix = "../build_tuner/" - #Bench = bench_tuner_data[bench_name] - #loss_confs = readPromiseResults(dir_prefix + Bench.loss1_result_file, dir_prefix + Bench.loss2_result_file) - loss_confs = readPromiseResults2(dir_prefix + Bench.loss1_result_file, \ - dir_prefix + Bench.loss2_result_file, Bench.layer_file) - - loss1_confs = loss_confs[0] - loss2_confs = loss_confs[1] - - ind = 0 - validated_confs1 = [] - failed_confs1 = [] - for layer_swings in loss1_confs: - confidence = testPromiseRun(Bench, layer_swings, 1.0) - if confidence >= 95: - validated_confs1.append(layer_swings) - else: - failed_confs1.append(layer_swings) - ind += 1 - - - ind = 0 - validated_confs2 = [] - failed_confs2 = [] - for layer_swings in loss2_confs: - confidence = testPromiseRun(Bench, layer_swings, 2.0) - if confidence >= 95: - validated_confs2.append(layer_swings) - else: - failed_confs2.append(layer_swings) - ind += 1 - - - dumpValidatedConfigs(validated_confs1, Bench.result_dir_1, - Bench.layer_file, "promise_validated_confs.txt") - dumpValidatedConfigs(validated_confs2, Bench.result_dir_2, - Bench.layer_file, "promise_validated_confs.txt") - - dumpValidatedConfigs(failed_confs1, Bench.result_dir_1, - Bench.layer_file, "promise_failed_confs.txt") - dumpValidatedConfigs(failed_confs2, Bench.result_dir_2, - Bench.layer_file, "promise_failed_confs.txt") - - - - - -def copyValidatedConf(result_dir, validated_confs): - - src_dir = result_dir + "/promise_tuner/high_confidence/" - dest_dir = result_dir + "/promise_tuner/validated/" - - if not os.path.isdir(dest_dir): - os.mkdir(dest_dir) - - for fname in validated_confs: - shutil.copy(src_dir + fname, dest_dir + fname) - - - -def copyFailedConf(result_dir, failed_confs): - - src_dir = result_dir + "/promise_tuner/high_confidence/" - dest_dir = result_dir + "/promise_tuner/failed/" - - if not os.path.isdir(dest_dir): - os.mkdir(dest_dir) - - for fname in failed_confs: - shutil.copy(src_dir + fname, dest_dir + fname) - - - - -def validateConfigs(Bench, result_dir, configs_arr, acc_thresh): - - validated_confs = [] - failed_confs = [] - for conf in configs_arr: - layer_swings = conf.flags - confidence = testPromiseRun(Bench, layer_swings, acc_thresh) - if confidence >= 95: - validated_confs.append(conf.fname) - else: - failed_confs.append(conf.fname) - - - copyValidatedConf(result_dir, validated_confs) - copyFailedConf(result_dir, failed_confs) - - - - - - - -def runPromiseBenchValidation2(Bench): - - - config_arr1 = loadConfigData(Bench.result_dir_1, 100) - config_arr2 = loadConfigData(Bench.result_dir_2, 100) - config_arr3 = loadConfigData(Bench.result_dir_3, 100) - - - validateConfigs(Bench, Bench.result_dir_1, config_arr1, 1.0) - validateConfigs(Bench, Bench.result_dir_2, config_arr2, 2.0) - validateConfigs(Bench, Bench.result_dir_3, config_arr3, 3.0) - - - - -### NOTE: Algo Tuner Validation routines - - - - - -def addAccuracyLoss(dest_file, accuracy_loss): - - f = open(dest_file, "r") - file_str = "" - ind = 0 - for x in f: - line_str = x - if ind == 0: - line_str = x.replace("\n", "") - line_str += "\tvalidation_loss=" + str(accuracy_loss) + "\n" - - file_str += line_str - ind += 1 - f.close() - - - f_out = open(dest_file, "w+") - f_out.write(file_str) - f_out.close() - - - - -def dumpValidConfigs(result_dir, validated_confs, src_dir): - - src_dir = result_dir + "/algo_tuner/" + src_dir + "/" # high_confidence/" - dest_dir = result_dir + "/algo_tuner/validated/" - - if not os.path.isdir(dest_dir): - os.mkdir(dest_dir) - - for (fname, accuracy_loss) in validated_confs: - dest_file = dest_dir + fname - shutil.copy(src_dir + fname, dest_file) - addAccuracyLoss(dest_file, accuracy_loss) - - - -def dumpFailedConfigs(result_dir, failed_confs, src_dir): - - src_dir = result_dir + "/algo_tuner/" + src_dir + "/" # high_confidence/" - dest_dir = result_dir + "/algo_tuner/failed/" - - if not os.path.isdir(dest_dir): - os.mkdir(dest_dir) - - for (fname, accuracy_loss) in failed_confs: - dest_file = dest_dir + fname - shutil.copy(src_dir + fname, dest_file) - addAccuracyLoss(dest_file, accuracy_loss) - - - - -def readAccuracy(file_name): - - file = open(file_name, "r") - file_str = file.read() - file.close() - - accuracy = 0.0 - try: - accuracy = float(file_str) - except: - print ("ERROR: Reading accuracy from 'final_accuracy' file") - sys.exit(0) - - print ("accuracy = ", accuracy) - return accuracy - -def getBaselineConfig(num_layers): - - fp32_swing = 11 - swings = [] - - for i in range(num_layers): - swings.append(str(fp32_swing)) - - return swings - - - -def readConfidence(target_acc): - - f = open("run_accuracies.txt") - index = 0.0 - unsuccessful = 0.0 - sum_acc = 0.0 - for x in f: - x = x.strip() - acc = float(x) - if acc < target_acc: - unsuccessful += 1 - index += 1 - sum_acc += acc - - f.close() - - confidence = ( (index - unsuccessful) / index) * 100.0 - print ("run_confidence = ", confidence) - avg_acc = sum_acc / index - - return confidence, avg_acc - - - -def invokeBinary(binary_path, layer_swings, runs, input_size, offset, target_acc): # threshold): - - default_skip = 4 - # Write to promise_flags - fout = open("promise_flags", "w+") - for swing in layer_swings: - int_swing = int(swing) - if int_swing > 0: - fout.write(str(swing) + "\n") - fout.close() - - run_cmd = "./" + binary_path + " " + str(runs) + " " + str(target_acc) + " " + str(default_skip) + " " + str(input_size) + " " + str(offset) - # Execute Validation Run - #p = subprocess.Popen("./" + validation_binary, shell=True) - - p = subprocess.Popen(run_cmd, shell=True) - p.wait() - - - - - -def validateAlgoConfigs(binary_path, result_dir, configs_arr, gold_acc, \ - acc_thresh, runs, src_dir = "high_confidence"): - - # NOTE: Use confidence target as 95% - confidence_target = 95 - # NOTE: 1 run sufficient for software approximations - - validated_confs = [] - failed_confs = [] - - #validation_acc = Bench.validation_accuracy - target_acc = gold_acc - acc_thresh - - for conf in configs_arr: - layer_swings = conf.flags - invokeBinary(binary_path, layer_swings, runs, 2000, 8000, target_acc) - confidence, avg_acc = readConfidence(target_acc) - - accuracy_loss = gold_acc - avg_acc - if confidence >= confidence_target: - validated_confs.append((conf.fname, accuracy_loss)) - else: - failed_confs.append((conf.fname, accuracy_loss)) - - - dumpValidConfigs(result_dir, validated_confs, src_dir) - dumpFailedConfigs(result_dir, failed_confs, src_dir) - - - - -def runAlgoBenchValidate(Bench): - - num_layers = Bench.num_layers - base_conf = getBaselineConfig(num_layers) - # Path to binary to run - binary_path = Bench.promise_binary - # NOTE: 'target_acc' passed 0.0 since unused for baseline run - invokeBinary(binary_path, base_conf, 1, 2000, 8000, 0.0) - gold_acc = readAccuracy("final_accuracy") - - - loss1_dir = Bench.result_dir_1 - loss2_dir = Bench.result_dir_2 - loss3_dir = Bench.result_dir_3 - - loss1_configs = loadConfigData(loss1_dir, 100) - loss2_configs = loadConfigData(loss2_dir, 100) - loss3_configs = loadConfigData(loss3_dir, 100) - - runs = 1 - validateAlgoConfigs(binary_path, loss1_dir, loss1_configs, gold_acc, 1.0, runs) - validateAlgoConfigs(binary_path, loss2_dir, loss2_configs, gold_acc, 2.0, runs) - validateAlgoConfigs(binary_path, loss3_dir, loss3_configs, gold_acc, 3.0, runs) - - - - - - -def getStatisticalConfidence(binary_path, layer_swings, \ - gold_accuracy, accuracy_slack, \ - total_runs, abort_after): - - - target_acc = gold_accuracy - accuracy_slack - - # Write to promise_flags - fout = open("promise_flags", "w+") - for swing in layer_swings: - int_swing = int(swing) - if int_swing > 0: - fout.write(str(swing) + "\n") - fout.close() - - extra_args = str(total_runs) + " " + str(target_acc) + " " + str(abort_after) - # Execute Validation Run - p = subprocess.Popen("./" + binary_path + " " + extra_args, shell=True) - p.wait() - - f = open("run_accuracies.txt") - index = 0.0 - unsuccessful = 0.0 - sum_acc = 0.0 - for x in f: - x = x.strip() - acc = float(x) - if acc < target_acc: - unsuccessful += 1 - index += 1 - sum_acc += acc - - f.close() - - confidence = ( (index - unsuccessful) / index) * 100.0 - avg_acc = sum_acc / index - print ("run_confidence = ", confidence, " avg_acc = ", avg_acc) - - return avg_acc, confidence - - - - - - - - -if __name__ == "__main__" : - - - getStatisticalConfidence("lenet_promise", [7, 1, 1, 1], 99.0, 2, 50, 5) - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/automated_tests.py b/hpvm/projects/hpvm-tensor-rt/bin/automated_tests.py deleted file mode 100644 index 8ac059ba0d0ac16dc354a367810dce5a31a15fc0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/automated_tests.py +++ /dev/null @@ -1,136 +0,0 @@ - - -import os -import sys -from tuner_src import benchmarks -import subprocess - - -def readAccuracy(): - - f = open("final_accuracy") - acc_str = f.read() - return acc_str - - -def executeAndDumpOutput(binary_path): - - run_cmd = "./" + binary_path - output_file_path = "./test_dumps/" + binary_path - output_file = open(output_file_path, "a+") - - p = subprocess.Popen(run_cmd, shell=True, stdout=output_file) - retcode = p.wait() - - output_file.close() - - accuracy = readAccuracy() - print ("accuracy = ", accuracy) - - return retcode - - - - -def runTensorBinaries(test_benchmarks): - - # List of programs that faile during execution - For reporting - failed_progs = [] - for bench_id in test_benchmarks: - bench = test_benchmarks[bench_id] - print ("bench = ", bench.tuner_binary) - - retcode = executeAndDumpOutput(bench.tuner_binary) - - if retcode != 0: - failed_progs.append(bench.tuner_binary) - - return failed_progs - - - -def runLayerBinaries(test_benchmarks): - - # List of programs that faile during execution - For reporting - failed_progs = [] - - for bench_id in test_benchmarks: - bench = test_benchmarks[bench_id] - print ("bench = ", bench.promise_binary) - - retcode = executeAndDumpOutput(bench.promise_binary) - - if retcode != 0: - failed_progs.append(bench.promise_binary) - - return failed_progs - - - -def runFp16Binaries(test_benchmarks): - - # List of programs that faile during execution - For reporting - failed_progs = [] - for bench_id in test_benchmarks: - bench = test_benchmarks[bench_id] - print ("bench = ", bench.fp16_binary) - - retcode = executeAndDumpOutput(bench.fp16_binary) - - if retcode != 0: - failed_progs.append(bench.tuner_binary) - - return failed_progs - - - - -def runTests(test_benchmarks): - - if not os.path.exists("test_dumps"): - os.mkdir("test_dumps") - - tensor_failed_progs = runTensorBinaries(test_benchmarks) - layer_failed_progs = runLayerBinaries(test_benchmarks) - fp16_failed_progs = runFp16Binaries(test_benchmarks) - - failed_progs = tensor_failed_progs + layer_failed_progs + fp16_failed_progs - - total_tests = len(test_benchmarks) * 3 - succesful_tests = total_tests - len(failed_progs) - - - print ("\n\n\n **** Results Summary ***** \n\n\n") - - print ("Total_Tests = ", total_tests, "\n") - print ("Successful_Tests = ", succesful_tests, "\n") - print ("Failed_Tests = ", total_tests - succesful_tests, "\n") - - print ("\n\n --- Failing Tests = ", tensor_failed_progs + layer_failed_progs) - - print ("\n *Per-process logs dumped to ./test_dumps/") - - - -def checkEnvironment(): - - if not "CUDA_INCLUDE_PATH" in os.environ: - print ("ERROR: CUDA_INCLUDE_PATH NOT SET!") - sys.exit(0) - - if not "CUDNN_PATH" in os.environ: - print ("ERROR: CUDA_PATH NOT SET!") - sys.exit(0) - - - if not os.path.exists("promise_flags"): - print ("promise_flags NOT found -- CREATE promise_flags with flag assignment per-layer") - sys.exit(0) - - -if __name__ == "__main__": - - checkEnvironment() - - runTests(benchmarks.bench_tuner_data) - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/compute_install_times.py b/hpvm/projects/hpvm-tensor-rt/bin/compute_install_times.py deleted file mode 100644 index 6e59b72f023a7869e721ba62f923f5e4ca791113..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/compute_install_times.py +++ /dev/null @@ -1,116 +0,0 @@ - - -class TuningParameters: - def __init__(self): - self.iterations_measured = 150 - self.total_iterations = 30000 - - -tunerParams = TuningParameters() - - -class Benchmark: - def __init__(self): - self.binary_time = 0 - - -### All times are real profiled times on the Jetson Board -### Times are for 150 OpenTuner iterations on Jetson - -ResNet50 = Benchmark() -ResNet50.tuner_time = 3.85 * 100 * 150 # 50 images * 100 batches - -VGG16_ImageNet = Benchmark() -VGG16_ImageNet.tuner_time = 4.55 * 100 * 150 # 50 images * 100 batches - -AlexNet_ImageNet = Benchmark() -AlexNet_ImageNet.tuner_time = 0.7 * 100 * 150 - - -VGG16_CIFAR10 = Benchmark() -VGG16_CIFAR10.tuner_time = 1.54 * 60 * 60 # 50 images * 100 batches - - -VGG16_CIFAR100 = Benchmark() -VGG16_CIFAR100.tuner_time = 1.57 * 60 * 60 # 50 images * 100 batches - - -ResNet18 = Benchmark() -ResNet18.tuner_time = 0.52 * 60 * 60 # 12.9 measured for 1000 images - - -MobileNet = Benchmark() -MobileNet.tuner_time = 0.72 * 60 * 60 # 50 images * 100 batches - - -AlexNet_CIFAR10 = Benchmark() -AlexNet_CIFAR10.tuner_time = 0.67 * 60 * 60 # Time in hours - - -AlexNet2_CIFAR10 = Benchmark() -AlexNet2_CIFAR10.tuner_time = 0.19 * 60 * 60 - - -LeNet_CIFAR10 = Benchmark() -LeNet_CIFAR10.tuner_time = 0.11 * 60 * 60 - - - - - -def getInstallTime(Bench): - - ## We limit pareto configs to 50 after iterations of tuning complete - - tuner_invocations = tunerParams.total_iterations / tunerParams.iterations_measured - - extrapolated_time = tuner_invocations * Bench.tuner_time - - time_hours = extrapolated_time / (60 * 60) - - return time_hours - - - -# Routine to compute extrapolated tuning times -def computeExtrapolatedInstallTime(): - - - resnet50_time = getInstallTime(ResNet50) - print ("*** ResNet50 time (hrs) = ", resnet50_time) - - resnet18_time = getInstallTime(ResNet18) - print ("*** ResNet18 time (hrs) = ", resnet18_time) - - mobilenet_time = getInstallTime(MobileNet) - print ("*** MobileNet time (hrs) = ", mobilenet_time) - - vgg16_img_time = getInstallTime(VGG16_ImageNet) - print ("*** VGG16-Imagenet time (hrs) = ", vgg16_img_time) - - vgg16_cifar10_time = getInstallTime(VGG16_CIFAR10) - print ("*** VGG16-CIFAR10 time (hrs) = ", vgg16_cifar10_time) - - vgg16_cifar100_time = getInstallTime(VGG16_CIFAR100) - print ("*** VGG16-CIFAR100 time (hrs) = ", vgg16_cifar100_time) - - alexnet_img_time = getInstallTime(AlexNet_ImageNet) - print ("*** AlexNet-Imagenet time (hrs) = ", alexnet_img_time) - - alexnet_cifar10_time = getInstallTime(AlexNet_CIFAR10) - print ("*** AlexNet-CIFAR10 time (hrs) = ", alexnet_cifar10_time) - - alexnet2_cifar10_time = getInstallTime(AlexNet2_CIFAR10) - print ("*** AlexNet2-CIFAR10 time (hrs) = ", alexnet2_cifar10_time) - - lenet_cifar10_time = getInstallTime(LeNet_CIFAR10) - print ("*** LeNet-CIFAR10 time (hrs) = ", lenet_cifar10_time) - - - - - -if __name__ == "__main__": - - computeExtrapolatedInstallTime() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/error_sensitivity.py b/hpvm/projects/hpvm-tensor-rt/bin/error_sensitivity.py deleted file mode 100644 index 9f2ffb3eacd3cb81bcefb4b44a48f1d0a8a8356d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/error_sensitivity.py +++ /dev/null @@ -1,139 +0,0 @@ - - -import subprocess -import os -import operator - - -def constructTunerFile(num_flags, tensor_id, error_level, default_error): - - f = open("opentuner_flags", "w+") - - for i in range(num_flags): - if i == tensor_id: - f.write(str(error_level) + "\n") - else: - f.write(str(default_error) + "\n") - - f.close() - - - -def runAndTestError(binary_name, gold_acc): - - num_runs = 20 - - binary_name = "./" + binary_name - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([binary_name, str(num_runs)], stdout = FNULL) - p.wait() - - f = open("run_accuracies.txt") - - total_err = 0.0 - for x in f: - acc = float(x.strip()) - total_err += (gold_acc - acc) - - avg_err = total_err / num_runs - - return avg_err - - - - -def test_sensitivity(Bench): - - tensor_errors = [] - - error_levels = [6, 9, 12, 15] - num_flags = Bench.num_flags - - for tensor_id in range(num_flags): - total_error = 0 - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 0) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - print (tensor_id, error_level, error) - total_error += error - - avg_error = total_error / len(error_levels) - - tensor_errors.append([tensor_id, avg_error]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_errors_1000.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i) + "\t" + str(tensor_errors[i][1]) + "\n") - - f.close() - - f_name = Bench.base_dir + "/tensor_errors_ranked_1000.txt" - f2 = open(f_name, "w+") - tensor_errors.sort(key=operator.itemgetter(1)) - - - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - - f2.write(str(tensor_errors[i][0]) + "\t" + str(tensor_errors[i][1]) + "\n") - - - f2.close() - - - -def test_sensitivity2(Bench): - - num_flags = Bench.num_flags - - constructTunerFile(num_flags, 0, 3, 3) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - - ref_acc = Bench.tuner_accuracy - error - print ("*** Gold accuracy = ", Bench.tuner_accuracy, " Ref accuracy = ", ref_acc, " *** \n\n") - - - tensor_errors = [] - - error_levels = [6, 9, 12, 15] - - for tensor_id in range(num_flags): - total_error = 0 - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 3) - error = runAndTestError(Bench.tuner_binary, ref_acc) - print (tensor_id, error_level, error) - total_error += error - - avg_error = total_error / len(error_levels) - - tensor_errors.append([tensor_id, avg_error]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_composite_errors.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i) + "\t" + str(tensor_errors[i][1]) + "\n") - - f.close() - - f_name = Bench.base_dir + "/tensor_composite_errors_ranked.txt" - f2 = open(f_name, "w+") - tensor_errors.sort(key=operator.itemgetter(1)) - - - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - - f2.write(str(tensor_errors[i][0]) + "\t" + str(tensor_errors[i][1]) + "\n") - - - f2.close() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/exhaustive.py b/hpvm/projects/hpvm-tensor-rt/bin/exhaustive.py deleted file mode 100644 index bae38bf7e497897ae3db4e12dce48914903739fb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/exhaustive.py +++ /dev/null @@ -1,140 +0,0 @@ - -import os -import sys -import shutil -import subprocess -import shutil - - - -class Benchmark: - def __init__(self): - self.binary = "" - self.num_flags = 4 - - - -Alexnet1 = Benchmark() -Alexnet1.binary = "./lenet_keras_promise" -Alexnet1.accuracy = 98.8 -Alexnet1.flags = [[8], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4] ] - - -Alexnet2 = Benchmark() -Alexnet2.binary = "./fc4_clipped_promise" -Alexnet2.accuracy = 93.72 -Alexnet2.flags = [[3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7] ] - - - -def dumpConfig(conf_flags, dir_prefix, file_id): - - shutil.copy("promise_flags", dir_prefix + "/" + str(file_id) + ".txt") - - -def dumpFinalConfigs(final_confs, dir_prefix): - - f = open(dir_prefix + "/final_confs.txt", "w+") - for conf in final_confs: - ind = 0 - for flag in conf: - f.write(str(flag)) - if ind < len(conf) - 1: - f.write(",") - - ind += 1 - f.write("\n") - - f.close() - - -def getAccuracy(): - - file = open("final_accuracy", "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - - -def testConfidence(binary, target_acc, total_runs): - - for i in range(total_runs): - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - acc = getAccuracy() - if acc < target_acc: - return False - - return True - - -def singleRun(binary): - - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - - return getAccuracy() - - - -def createPromiseFile(conf_flags): - - f = open("promise_flags", "w+") - for flag in conf_flags: - f.write(str(flag) + "\n") - f.close() - - - -def runExhaustive(Bench, threshold, dir_prefix): - - flags = Bench.flags - - accepted_confs = [] - ind = 0 - for flag1 in flags[0]: - for flag2 in flags[1]: - for flag3 in flags[2]: - for flag4 in flags[3]: - print (flag1, flag2, flag3, flag4) - conf_flags = [] - conf_flags.append(flag1) - conf_flags.append(flag2) - conf_flags.append(flag3) - conf_flags.append(flag4) - - createPromiseFile(conf_flags) - - accuracy = singleRun(Bench.binary) - target_acc = Bench.accuracy - threshold - - if accuracy > target_acc: - if testConfidence(Bench.binary, target_acc, 3): - dumpConfig(conf_flags, dir_prefix, ind) - accepted_confs.append(conf_flags) - - ind += 1 - - dumpFinalConfigs(accepted_confs, dir_prefix) - - - -if __name__ == "__main__": - - #runExhaustive(Alexnet1, 1.0, "lenet_1") - #runExhaustive(Alexnet1, 2.0, "lenet_2") - - runExhaustive(Alexnet2, 1.0, "fc4_1") - runExhaustive(Alexnet2, 2.0, "fc4_2") - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py b/hpvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py deleted file mode 100644 index 0b7f09d92e91894d284b40cc0bd2d346c08e36c7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py +++ /dev/null @@ -1,42 +0,0 @@ - - -import sys - - -if __name__ == "__main__": - - f = open(sys.argv[1], "r") - f2 = open("quant_ranges.txt", "w+") - - layer_line = False - for x in f: - if "ConvLayer_PROMISE" in x or "FCLayer_PROMISE" in x or layer_line == True: - if layer_line == True: - layer_line = False - else: - layer_line = True - - print x - toks = x.split(",") - - for tok in toks: - tok = tok.strip() - tok_val = "" - try: - tok_val = float(tok) - try: - tok_val = int(tok) - except: - print (tok_val) - f2.write(str(tok_val) + " ") - #f2.write("tok_val = ", tok_val + " ") - except: - continue - - f2.write("\n") - - - f.close() - f2.close() - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/get_power_stats.py b/hpvm/projects/hpvm-tensor-rt/bin/get_power_stats.py deleted file mode 100644 index e81cf10ece72c43457de718365bd2017e1684ab2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/get_power_stats.py +++ /dev/null @@ -1,79 +0,0 @@ - -import sys -import numpy as np -import subprocess - - -def get_avg_power(f_name): - - f = open(f_name, "r") - - gpu_power = [] - ddr_power = [] - sys_power = [] - - for x in f: - toks = x.split() - - gpu_power.append(float(toks[1])) - ddr_power.append(float(toks[2])) - sys_power.append(float(toks[3])) - - - avg_gpu_power = np.mean(gpu_power) - avg_ddr_power = np.mean(ddr_power) - avg_sys_power = np.mean(sys_power) - - print ("** avg_gpu_power = ", avg_gpu_power, " avg_ddr_power = ", \ - avg_ddr_power, " avg_sys_power = ", avg_sys_power) - - return (avg_gpu_power, avg_ddr_power, avg_sys_power) - - -#avail_frequencies = [140250000, 229500000, 318750000, 408000000, 497250000, -# 586500000, 675750000, 765000000, 854250000, -# 943500000, 1032750000, 1122000000, 1211250000, 1300500000]; - - -avail_frequencies = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; - - -if __name__ == "__main__": - - - programs = ["alexnet_promise", "alexnet2_promise", "vgg16_cifar10_promise", "resnet18_promise", "resnet50_imagenet_promise", "mobilenet_promise", "vgg16_imagenet_promise"] - - for binary_path in programs: - - power_avgs = [] - power_freq_file = "power_data/" + binary_path + "/power_vals.txt" - fout = open(power_freq_file, "w+") - - for frequency in avail_frequencies: - - print (frequency) - poll_path = "./poll" - iterations = 10 - - poll_cmd = poll_path + " " + str(frequency) # sudo needed for frequency change - subprocess.call(poll_cmd, shell=True) - - - binary_path = "./" + binary_path - power_file = " power_data/" + binary_path + "/power.out." + str(frequency) - profile_cmd = "../../system_profiler/build/offline_profiler " + binary_path + " " + \ - str(iterations) + " tensor.out." + str(frequency) + power_file - - subprocess.call(profile_cmd, shell=True) - - - #avg_power = get_avg_power("power.out." + str(frequency)) - avg_power = get_avg_power(power_file) - power_avgs.append(avg_power) - - fout.write(str(avg_power[0]) + " " + str(avg_power[1]) + " " + str(avg_power[2]) + "\n") - print (avg_power) - - - print (power_avgs) - fout.close() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/install_runtime.sh b/hpvm/projects/hpvm-tensor-rt/bin/install_runtime.sh deleted file mode 100644 index 33a54cd0de626113e5cf11e2f6a6928d4fa384eb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/install_runtime.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -export HPVM_TENSOR_RT_HOME=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/ -export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH - -clang++ -I/software/cuda-9.1/include -emit-llvm -c ${HPVM_TENSOR_RT_HOME}/tensor_runtime/include/tensor_signatures.cc -o ${HPVM_TENSOR_RT_HOME}/lib/tensor_runtime.bc -llvm-dis --version -llvm-dis ${HPVM_TENSOR_RT_HOME}/lib/tensor_runtime.bc - - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/mark_depthwise.py b/hpvm/projects/hpvm-tensor-rt/bin/mark_depthwise.py deleted file mode 100644 index c64a9f242fcf80b585c5862ceef16b8fb8ce50a5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/mark_depthwise.py +++ /dev/null @@ -1,48 +0,0 @@ - -import sys - - -def loadLayerDesc(layer_desc_file): - - layer_desc = [] - f = open(layer_desc_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - - -if __name__ == "__main__": - - if len(sys.argv) < 4: - print ("Usage: python mark_depthwise.py $layer_file $input_conf $output_conf") - - layer_file_name = sys.argv[1] - input_file_name = sys.argv[2] - output_file_name = sys.argv[3] - - - layer_desc = loadLayerDesc(layer_file_name) - - f_in = open(input_file_name) - f_out = open(output_file_name, "w+") - - for x in f_in: - it = 0 - confs = x.split(",") - print confs - for conf in confs: - print (" it = ", it, " layer_desc[it] = ", layer_desc[it], " \n") - if layer_desc[it][0] == "depthwise_conv": - f_out.write("9,") - else: - f_out.write(conf) - if it < len(confs) - 1: - f_out.write(",") - - it += 1 - - f_in.close() - f_out.close() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/measure_conf_accuracy.py b/hpvm/projects/hpvm-tensor-rt/bin/measure_conf_accuracy.py deleted file mode 100644 index 4ca1f3f52e59498725414f37e56e06e5e74f1953..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/measure_conf_accuracy.py +++ /dev/null @@ -1,316 +0,0 @@ - -import os -import sys -import shutil -import subprocess -import shutil -import numpy as np - - - -class Benchmark: - def __init__(self): - self.binary = "" - - - -benchmarks = {} - -Alexnet1 = Benchmark() -Alexnet1.binary = "./lenet_keras_promise" -Alexnet1.accuracy = 98.7 -Alexnet1.loss1_conf = "8 8 8 8,4,4,7" -Alexnet1.loss2_conf = "8 8 8 8,3,4,7" - -benchmarks["lenet"] = Alexnet1 - - -Alexnet2 = Benchmark() -Alexnet2.binary = "./fc4_clipped_promise" -Alexnet2.accuracy = 93.72 -Alexnet2.loss1_conf = "7,7,6,7" -Alexnet2.loss2_conf = "4,4,4,5" - -benchmarks["fc4"] = Alexnet2 - - -Alexnet3 = Benchmark() -Alexnet3.binary = "./alexnet_valid" -Alexnet3.accuracy = 79.16 -Alexnet3.loss1_conf = "8 8 8 8,6,6,6,7,7" -Alexnet3.loss2_conf = "8 8 8 8,4,4,6,4,7" - -benchmarks["alexnet"] = Alexnet3 - - -Alexnet4 = Benchmark() -Alexnet4.binary = "./alexnet2_valid" -Alexnet4.accuracy = 85.09 -Alexnet4.loss1_conf = "9 9 9,7,7,7,9 9 9,7,9 9" -Alexnet4.loss2_conf = "9 9 9,7,7,6,8 8 8,6,9 9" - -benchmarks["alexnet2"] = Alexnet4 - - -Alexnet5 = Benchmark() -Alexnet5.binary = "./resnet18_valid" -Alexnet5.accuracy = 89.44 -Alexnet5.loss1_conf = "9 9 9,8 8 8,8 8,8,8,8 8 8,7,8,8,8 8 8,7,8,8,8 8 8,8 8,8 8,8,8,8 8 8,7,8,8,8 8 8,8 8,8,8,8 8 8,8 8,8 8,8,8,8 8 8,8 8,8,8,8 8 8,8 8,8,8,8,8 8" -Alexnet5.loss2_conf = "9 9 9,8 8 8,8 8,8,8,8 8 8,7,8,8,8 8 8,7,8,8,8 8 8,8 8,8 8,8,8,8 8 8,7,8,8,7,8 8,8,8,8 8 8,8 8,8 8,8,8,8 8 8,8 8,8,8,8 8 8,7,8,8,8,8 8" - -benchmarks["resnet"] = Alexnet5 - - - -Alexnet6 = Benchmark() -Alexnet6.binary = "./vgg16_cifar10_valid" -Alexnet6.accuracy = 89.41 -Alexnet6.loss1_conf = "9 9 9,7,7,7,9 9 9,8 8 8,7,8 8 8,7,7,8 8 8,8 8 8,7,9 9 9,9 9" -Alexnet6.loss2_conf = "9 9 9,5,5,8 8 8 8,4,6,4,7,8 8 8,4,4,4,7,8 8 8,8 8" - -benchmarks["vgg16_cifar10"] = Alexnet6 - - -Alexnet7 = Benchmark() -Alexnet7.binary = "./vgg16_cifar100_valid" -Alexnet7.accuracy = 66.19 -Alexnet7.loss1_conf = "9 9 9,8 8 8 8,8 8 8,8 8 8 8,8 8 8,7,7,7,8 8 8,8 8 8 8,7,7,8 8 8 8,8 8 8,8 8" -Alexnet7.loss2_conf = "9 9 9,8 8 8 8,8 8 8,7,8 8 8,8 8 8,8 8 8 8,6,6,7,8 8 8,7,6,8 8 8,8 8" - -benchmarks["vgg16_cifar100"] = Alexnet7 - - - -Alexnet8 = Benchmark() -Alexnet8.binary = "./pipeline_GEOM_valid" -Alexnet8.loss1_conf = "8 8,8 8 8,8 8,7" -Alexnet8.loss2_conf = "8 8,8 8 8,8 8,6" - -benchmarks["pipeline_GEOM"] = Alexnet8 - - - -Alexnet9 = Benchmark() -Alexnet9.binary = "./pipeline_GEMO_valid" -Alexnet9.loss1_conf = "8 8,8 8 8,8 8,8 8" -Alexnet9.loss2_conf = "7,8 8 8,8 8,8 8" - -benchmarks["pipeline_GEMO"] = Alexnet9 - - - -Alexnet10 = Benchmark() -Alexnet10.binary = "./pipeline_GEO_valid" -Alexnet10.loss1_conf = "8 8,8 8 8,8 8" -Alexnet10.loss2_conf = "8 8,8 8 8,8 8" - -benchmarks["pipeline_GEO"] = Alexnet10 - - - -Alexnet11 = Benchmark() -Alexnet11.binary = "./pipeline_GSM_valid" -Alexnet11.loss1_conf = "8 8,8 8,7" -Alexnet11.loss2_conf = "7,8 8,6" - -benchmarks["pipeline_GSM"] = Alexnet11 - - - -Alexnet12 = Benchmark() -Alexnet12.binary = "./pipeline_GSME_valid" -Alexnet12.loss1_conf = "8 8,8 8,8 8,8 8 8" -Alexnet12.loss2_conf = "7,8 8,8 8,8 8 8" - -benchmarks["pipeline_GSME"] = Alexnet12 - - - -def createPromiseFile(conf_flag_str): - - conf_flags = conf_flag_str.split(",") - f = open("promise_flags", "w+") - for flag_str in conf_flags: - flags = flag_str.split() - f.write(str(flags[0]) + "\n") - f.close() - - -def getRunAccuracies(): - - run_accuracies = [] - file = open("run_accuracies.txt", "r") - file_str = file.read() - - for flag in file_str.split("\n"): - print ("*** flag = ", flag) - flag = flag.strip() - if flag == "": - continue - run_accuracies.append(float(flag)) - - file.close() - - return run_accuracies - - - -def testConfidence(binary): - - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - run_accuracies = getRunAccuracies() - - return np.mean(run_accuracies), np.std(run_accuracies) - - - -def getAccuracy(): - - file = open("final_accuracy", "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - #print accuracy - return accuracy - - -def getPSNR(): - - file = open("avg_psnr", "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return -100 - - #print accuracy - return accuracy - - - - -def testPSNRConfidence(binary, total_runs): - - run_accuracies = [] - run_psnr = [] - for i in range(total_runs): - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - acc = getAccuracy() - psnr = getPSNR() - run_accuracies.append(acc) - run_psnr.append(psnr) - - return np.mean(run_accuracies), np.std(run_accuracies), np.mean(run_psnr), np.std(run_psnr) - - - -def runBench(bench_name, dir_prefix): - - Bench = benchmarks[bench_name] - binary = Bench.binary - accuracy = Bench.accuracy - - createPromiseFile(Bench.loss1_conf) - mean, std = testConfidence(binary) - print ("mean = ", mean, " std = ", std) - - - f = open(dir_prefix + "/" + binary + "_loss1.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.close() - - createPromiseFile(Bench.loss2_conf) - mean, std = testConfidence(binary) - print ("mean = ", mean, " std = ", std) - - - f = open(dir_prefix + "/" + binary + "_loss2.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.close() - - - - - -def gen30dbFile(): - - f = open("psnr.txt", "w+"); - f.write("30"); - f.close() - - -def gen20dbFile(): - - f = open("psnr.txt", "w+"); - f.write("20"); - f.close() - - - -def runPSNRBench(bench_name, dir_prefix): - - Bench = benchmarks[bench_name] - binary = Bench.binary - - gen30dbFile() - createPromiseFile(Bench.loss1_conf) - mean, std, psnr_mean, psnr_std = testPSNRConfidence(binary, 20) - print ("mean = ", mean, " std = ", std) - - - f = open(dir_prefix + "/" + binary + "_loss30.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.write(" psnr_mean = " + str(psnr_mean) + " psnr_std = " + str(psnr_std)) - f.close() - - - gen20dbFile() - createPromiseFile(Bench.loss2_conf) - mean, std, psnr_mean, psnr_std = testPSNRConfidence(binary, 20) - print ("mean = ", mean, " std = ", std) - - f = open(dir_prefix + "/" + binary + "_loss20.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.write(" psnr_mean = " + str(psnr_mean) + " psnr_std = " + str(psnr_std)) - f.close() - - - - - - -def runDNNs(): - - #runBench("fc4", "avg_accuracies") - #runBench("lenet", "avg_accuracies") - #runBench("alexnet", "avg_accuracies") - #runBench("alexnet2", "avg_accuracies") - #runBench("resnet", "avg_accuracies") - #runBench("vgg16_cifar10", "avg_accuracies") - #runBench("vgg16_cifar100", "avg_accuracies") - - runPSNRBench("pipeline_GEOM", "avg_accuracies") - runPSNRBench("pipeline_GEMO", "avg_accuracies") - runPSNRBench("pipeline_GEO", "avg_accuracies") - runPSNRBench("pipeline_GSM", "avg_accuracies") - runPSNRBench("pipeline_GSME", "avg_accuracies") - - - - -if __name__ == "__main__": - - runDNNs() - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/measure_confidence.py b/hpvm/projects/hpvm-tensor-rt/bin/measure_confidence.py deleted file mode 100644 index 74aa23c71aa3e81fc9422a3cc73ba3b69ed98c8a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/measure_confidence.py +++ /dev/null @@ -1,125 +0,0 @@ - -import argparse -import os -import subprocess -import sys - - -def getAccuracy(file_name): - - if not os.path.exists(file_name): - print("final_accuracy file not found ") - sys.exit(0) - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -total_runs = 12.0 -skip_lines = 0 - - -def test_func(): - print "test_func" - sys.exit(0) - - -def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold): - - #total_runs = 100.0 - successful_runs = 0.0 - total_acc = 0 - - for i in range(int(total_runs)): - subprocess.call(binary_name) - accuracy = getAccuracy("final_accuracy") - total_acc += accuracy - - if accuracy > accuracy_threshold: - successful_runs += 1 - - confidence = (successful_runs / total_runs) * 100.0 - print("confidence = ", confidence) - avg_acc = total_acc / total_runs - print("average accuracy = ", avg_acc) - - return confidence, avg_acc - - -def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("opentuner_flags", "w+") - - index = 0 - results_str = "" - for x in f: - if index >= skip_lines: - error_knob = int(float(x.split()[1])) - print error_knob - tuner_file.write(str(error_knob) + "\n") - - results_str += x - index += 1 - - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs(binary, accuracy, confidence) - - if run_confidence > 90: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(results_str) - f2.close() - - conf_result = (run_confidence, avg_accuracy, file_name) - confidence_list.append(conf_result) - - return confidence_list - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - argparser.add_argument('--output-dir', help='Directory for storing output directory') - argparser.add_argument('--binary', help='Binary name to run') - argparser.add_argument('--accuracy', type=float, help='Accuracy constraint') - argparser.add_argument('--confidence', type=float, help='Confidence threshold') - - - args = argparser.parse_args() - result_dir = args.result_dir - output_dir = args.output_dir - binary = args.binary - accuracy = args.accuracy - confidence = args.confidence - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - #print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/mergeTensorOpAndErrors.py b/hpvm/projects/hpvm-tensor-rt/bin/mergeTensorOpAndErrors.py deleted file mode 100644 index 3c9ea9de2854ed133350950d3995f459120176de..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/mergeTensorOpAndErrors.py +++ /dev/null @@ -1,60 +0,0 @@ - - - -if __name__ == "__main__": - - dnn_benchs = [] - dnn_benchs.append("fc4") - dnn_benchs.append("lenet_keras") - dnn_benchs.append("alexnet_cifar10") - dnn_benchs.append("alexnet2_cifar10") - dnn_benchs.append("vgg16_cifar10") - dnn_benchs.append("vgg16_cifar100") - dnn_benchs.append("resnet18_cifar10") - dnn_benchs.append("mobilenet") - dnn_benchs.append("mobilenet_shallow") - - - for bench in dnn_benchs: - errors_file1 = "build_tuner/tuner_results/" + bench + "/tensor_errors_1000.txt" - errors_file2 = "build_test/tuner_results/" + bench + "/tensor_composite_errors.txt" - ops_file = "build_tuner/tuner_results/" + bench + "/op_names.txt" - - f1 = open(errors_file1) - f2 = open(errors_file2) - f3 = open(ops_file) - - fout = open("build_tuner/tuner_results/" + bench + "/tensor_op_errors.txt", "w+") - - bench_data = [] - for x in f3: - op_name = x.strip() - bench_data.append([op_name, 0.0, 0.0]) - - it = 0 - for x in f1: - if it >= len(bench_data): - break - toks = x.split() - error1 = float(toks[1]) - print error1 - bench_data[it][1] = error1 - it += 1 - - it = 0 - for x in f2: - if it >= len(bench_data): - break - toks = x.split() - error2 = float(toks[1]) - bench_data[it][2] = error2 - it += 1 - - for i in range(len(bench_data)): - fout.write(str(i) + "\t" + bench_data[i][0] + "\t" + str(bench_data[i][1]) + "\t" + str(bench_data[i][2]) + "\n") - - fout.close() - f1.close() - f2.close() - f3.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/read_weight_ranges.py b/hpvm/projects/hpvm-tensor-rt/bin/read_weight_ranges.py deleted file mode 100644 index c54d7dfcddc161aa20dd8378d2652d32c4905e38..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/read_weight_ranges.py +++ /dev/null @@ -1,43 +0,0 @@ - - -import numpy as np -import os -import struct - - -def read_value_range(file_name): - - print file_name - f = open(file_name, "rb") - - bytes = os.stat(file_name).st_size - elems = bytes/4 - - data_arr = struct.unpack('f'*elems, f.read(4*elems)) - - print (np.amin(data_arr)) - print (np.amax(data_arr)) - - - - -if __name__ == "__main__": - - dir_prefix = "model_params/alexnet2_cifar10/" - print dir_prefix - read_value_range(dir_prefix + "norm_cifar_input.bin") - read_value_range(dir_prefix + "conv1.bin") - read_value_range(dir_prefix + "conv1_bias.bin") - read_value_range(dir_prefix + "conv2.bin") - read_value_range(dir_prefix + "conv2_bias.bin") - read_value_range(dir_prefix + "conv3.bin") - read_value_range(dir_prefix + "conv3_bias.bin") - read_value_range(dir_prefix + "conv4.bin") - read_value_range(dir_prefix + "conv4_bias.bin") - read_value_range(dir_prefix + "conv5.bin") - read_value_range(dir_prefix + "conv5_bias.bin") - read_value_range(dir_prefix + "conv6.bin") - read_value_range(dir_prefix + "conv6_bias.bin") - read_value_range(dir_prefix + "fc1.bin") - read_value_range(dir_prefix + "fc1_bias.bin") - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/replace_half_calls.py b/hpvm/projects/hpvm-tensor-rt/bin/replace_half_calls.py deleted file mode 100644 index b75a7d4750074cf6234151ae21a8bff5af1050d5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/replace_half_calls.py +++ /dev/null @@ -1,35 +0,0 @@ - - -import sys - - -if __name__ == "__main__": - - if len(sys.argv) < 3: - print ("Usage: python replace_half_calls.py in_file.cc half_out_file.cc \n") - sys.exit(0) - - file_name = sys.argv[1] - out_file_name = sys.argv[2] - - f = open(file_name) - str = f.read() - - str = str.replace("tensorConvolution", "tensorHalfConvolution") - str = str.replace("tensorAdd", "tensorHalfAdd") - str = str.replace("tensorRelu", "tensorHalfRelu") - str = str.replace("tensorRelu2", "tensorHalfRelu2") - str = str.replace("tensorTanh", "tensorHalfTanh") - str = str.replace("tensorPooling", "tensorHalfPooling") - str = str.replace("tensorGemmGPU", "tensorHalfGemmGPU") - - print (str) - - f.close() - - f2 = open(out_file_name, "w+") - - f2.write(str) - - f2.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/run_dyn.py b/hpvm/projects/hpvm-tensor-rt/bin/run_dyn.py deleted file mode 100644 index 83956051bef2a868f7f685f3d471e5d5f84ac03d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/run_dyn.py +++ /dev/null @@ -1,42 +0,0 @@ -from pathlib import Path - -name_ranges = { - "canny_pareto": list(range(11, 28 + 1)), - "blend_pareto": list(range(11, 20 + 1)) -} -iters = 20 - -def run_binary(config_prefix, binary_file, vals): - from subprocess import run - from os import rename - from shutil import copy - from tqdm import tqdm - - out_dir = Path("run_data_{}".format(binary_file)) - out_dir.mkdir() - for i in tqdm(vals): - config = (config_prefix/"{}.txt".format(binary_file)).as_posix() - copy(config, "tuner_confs.txt") - with open("slowdowns.txt", 'w') as f: - f.write('\n'.join((str(i / 10) for _ in range(iters)))) - command = "./{} >out 2>&1".format(binary_file) - tqdm.write("{}; {}".format(command, i)) - run(command, shell=True, check=True) - out_path = (out_dir/"out{}".format(i)).as_posix() - profile_path = (out_dir/"profile_info_out{}.txt".format(i)).as_posix() - rename("out", out_path) - rename("profile_info_0.txt", profile_path) - # rename("final_accuracy", out_dir/"final_accuracy{}".format(i)) - - -def main(): - from sys import argv - - config_prefix = Path(argv[1]) - for binary_file, vals in name_ranges.items(): - print(binary_file) - run_binary(config_prefix, binary_file, vals) - - -if __name__ == "__main__": - main() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/select_top_results.py b/hpvm/projects/hpvm-tensor-rt/bin/select_top_results.py deleted file mode 100644 index 898b4c4f42211e010b1544039cbd4b4125c03b92..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/select_top_results.py +++ /dev/null @@ -1,89 +0,0 @@ - - -import argparse -import sys -import os - - -log_index = 7 -linear_index = 8 -quad_index = 9 - -top_k = 10 - -def dump_results(sorted_list, k, result_dir, sub_dir): - - ref_dir = result_dir + "/" + sub_dir - if not os.path.exists(ref_dir): - os.mkdir(ref_dir) - - for i in range(k): - file_name = sorted_list[i][1] - file_name = ref_dir + "/" + file_name + "_rank_" + str(i) - f = open(file_name, "w+") - f.write(str(sorted_list[i][2]) + "\t") - f.write(str(sorted_list[i][3]) + "\t") - f.write(str(sorted_list[i][4]) + "\n") - f.write(sorted_list[i][0]) - f.close() - - - - -def select_top_results(result_dir): - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - results_arr = [] - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - log_result = 0.0 - linear_result = 0.0 - quad_result = 0.0 - file_str = "" - - f = open(result_dir + "/" + file_name) - for x in f: - words = x.split() - log_result += float(words[log_index]) - linear_result += float(words[linear_index]) - quad_result += float(words[quad_index]) - file_str += x - - - file_result = (file_str, file_name, log_result, linear_result, quad_result) - results_arr.append(file_result) - - - sorted_list = sorted(results_arr, key = lambda tup: tup[2]) - dump_results(sorted_list, top_k, result_dir, "log") - - sorted_list = sorted(results_arr, key = lambda tup: tup[3]) - dump_results(sorted_list, top_k, result_dir, "linear") - - sorted_list = sorted(results_arr, key = lambda tup: tup[4]) - dump_results(sorted_list, top_k, result_dir, "quad") - - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - - args = argparser.parse_args() - result_dir = args.result_dir - - select_top_results(result_dir) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setupEnv.sh b/hpvm/projects/hpvm-tensor-rt/bin/setupEnv.sh deleted file mode 100644 index 58f16f20d0af12f041840b8037ae13e49c214ed4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setupEnv.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -module load cuda-toolkit/8.0 -export CUDNN_PATH=/software/cuda-toolkit-8.0/lib64/ -export LIBRARY_PATH=$LIBRARY_PATH:/software/cuda-toolkit-8.0/lib64/ - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setup_aws_paths.sh b/hpvm/projects/hpvm-tensor-rt/bin/setup_aws_paths.sh deleted file mode 100644 index d9f092a19f12a91bd588a356fc99744c14deb26a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setup_aws_paths.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -# CUDNN Path setup -# module load cuda-toolkit/9.1 -export CUDA_INCLUDE_PATH=/usr/local/cuda/include -export CUDNN_PATH=/use/local/cuda/lib64/ -export LIBRARY_PATH=/usr/local/cuda/lib64/:$LIBRARY_PATH -#export LD_LIBRARY_PATH=/usr/local/cuda/lib64/:$LD_LIBRARY_PATH - -# HPVM Path setup -#export CPATH=$CPATH:/home/hsharif3/anaconda2/include/ -#export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH -#export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/ -#export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/ diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setup_cuda_paths.sh b/hpvm/projects/hpvm-tensor-rt/bin/setup_cuda_paths.sh deleted file mode 100644 index 9f45a76033c7e82728a2bdaf0f82d2bfe9230272..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setup_cuda_paths.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# NOTE: Set Paths to local installation paths -# NOTE: Module cuda-took/9.1 not supported on non-EngrIT systems -module load cuda-toolkit/9.1 -export CUDA_INCLUDE_PATH=/software/cuda-9.1/include -export CUDNN_PATH=/software/cuda-9.1/lib64/ -export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH -export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh b/hpvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh deleted file mode 100644 index b288ccfe43c577f9ad14c4eb16284539ae5682ea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh +++ /dev/null @@ -1,8 +0,0 @@ - -export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda-9.0/targets/aarch64-linux/lib/ -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-9.0/targets/aarch64-linux/lib/ -export CUDNN_PATH=/usr/local/cuda-9.0/ -export CUDA_INCLUDE_PATH=${CUDNN_PATH}/include - -export LLVM_BUILD_ROOT=/home/nvidia/Gitlab/hpvm/build/ -export LLVM_SRC_ROOT=/home/nvidia/Gitlab/hpvm/llvm/ diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setup_paths.sh b/hpvm/projects/hpvm-tensor-rt/bin/setup_paths.sh deleted file mode 100644 index 446481b79a47827bf47341ce9d14f15f57d26866..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setup_paths.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -# Setting include path for Anaconda include files -export CPATH=$CPATH:/home/hsharif3/anaconda2/include/ -# Setting path for llvm/clang-4.0 build -export PATH=/home/hsharif3/Gitlab/llvm/llvm/build/bin/:$PATH - -export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/ - -export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/ diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh b/hpvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh deleted file mode 100644 index 05db92cc08c8532ae5f83f6bdee15c12b8ed9159..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# CUDNN Path setup -module load cuda-toolkit/9.1 -export CUDA_INCLUDE_PATH=/software/cuda-9.1/include -export CUDNN_PATH=/software/cuda-9.1/lib64/ -export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH -export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/swing_selection.py b/hpvm/projects/hpvm-tensor-rt/bin/swing_selection.py deleted file mode 100644 index b5c484a23029f97218500571ebb8bcafc718d430..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/swing_selection.py +++ /dev/null @@ -1,304 +0,0 @@ - - -import os -import warnings -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from matplotlib.ticker import MultipleLocator -import numpy as np -from scipy.signal import savgol_filter -import math -import struct - - - -def readDataFromText(textFile): - results = [] - with open(textFile, "r") as f: - for line in f: - token = line.split("\t") - if (len(token) < 7): - continue - record = (token[0], float(token[1]), float(token[5]), float(token[6])) - results.append(record) - return results - - -convL1bins = [(0.985901, 1.36474), (0.852871, 1.16982), (0.422283, 0.55701), (0.259752, 0.335259), (0.216577, 0.277843), (0.185812, 0.23733), (0.148996, 0.189171), (0.100007, 0.125816), (0.0003127876261714846, 0.014511194080114365)] -convL2bins = [(0.995298, 1.3643), (0.861066, 1.16279), (0.426857, 0.547827), (0.262645, 0.330186), (0.218984, 0.273731), (0.187878, 0.233872), (0.150619, 0.186512), (0.10106, 0.124477), (0.00035427528200671077, 0.020199092105031013)] - -biasL1bins = [(0.3510325849056244, 0.49078235030174255), (0.30895063281059265, 0.4311973750591278), (0.16023841500282288, 0.22283604741096497), (0.099583700299263, 0.1381179839372635), (0.08340170979499817, 0.11503150314092636), (0.07280077040195465, 0.09948030859231949), (0.05857400223612785, 0.07965542376041412), (0.04044099152088165, 0.054193537682294846), (0.0, 0.0)] -biasL2bins = [(0.4154910147190094, 0.5820578932762146), (0.3656001389026642, 0.5121639370918274), (0.18930286169052124, 0.2637346684932709), (0.11687946319580078, 0.16306844353675842), (0.09796475619077682, 0.13558265566825867), (0.0848352462053299, 0.11619425565004349), (0.06783176958560944, 0.09277229756116867), (0.046059850603342056, 0.062238890677690506), (0.0, 0.0)] - -gemmL1bins= [(0.711203, 0.772211), (0.625894, 0.679601), (0.322665, 0.350383), (0.199646, 0.216727), (0.166556, 0.180781), (0.142945, 0.155132), (0.114662, 0.124399), (0.0771065, 0.0835984), (0.00034660729579627514, 0.008546584285795689)] -gemmL2bins= [(0.715208, 0.768102), (0.629411, 0.675947), (0.324433, 0.348358), (0.200659, 0.21539), (0.167381, 0.179634), (0.143637, 0.154119), (0.115197, 0.123548), (0.0774642, 0.0829647), (0.0003496285935398191, 0.009841435588896275)] - - - -def findBinByOp(op): - if op == 'tensorConv': - return convL1bins, convL2bins - if op == 'tensorAdd': - return biasL1bins, biasL2bins - if op == 'tensorGemm': - return gemmL1bins, gemmL2bins - - return None, None - - -def getSwing(Lx, opLxbin): - if opLxbin == None: - return 0 - for i, (minT, maxT) in enumerate(opLxbin): - if Lx > minT: - return i - - return 9 - - - -def getConfiguration(L_thresholds): - configuration = [] - for l in L_thresholds: - # L0 is op_type - opL1bin, opL2bin = findBinByOp(l[0]) - # NOTE: L2 is L1 error, L3 is L2 error - sL1 = getSwing(l[2], opL1bin) - sL2 = getSwing(l[3], opL2bin) - if sL1 < 7: - sL1 = sL1 + 1 - if sL2 < 7: - sL2 = sL2 + 1 - configuration.append((l[0], l[1], l[2], l[3], sL1, sL2, max(sL1, sL2))) - - return configuration - - -def displayConfig(config): - for c in config: - print(c) - -def displayMultipleConfigurations(configurations): - for f, c in configurations.items(): - print(f) - displayConfig(c) - print() - -def getConfigFromFile(filename): - L_requirements = readDataFromText(filename) - config = getConfiguration(L_requirements) - return config - - -def getConfigurationsFromDir(dirname): - configurations = dict() - for f in os.listdir(dirname): - configurations[f] = getConfigFromFile(os.path.join(dirname, f)) - - return configurations - - -def getLayerWiseTarget(config): - target = [] - for i, op in enumerate(config): - if (op[0] == 'tensorGemm') or (op[0] == 'tensorConv'): - t = op[6] - for j in range(i+1, len(config)): - if config[j][0] == 'tensorGemm' or config[j][0] == 'tensorConv': - break - t = max(t, config[j][6]) - target.append(t) - t = 0 - - return target - - -def dumpLayerWiseTarget(file, targets): - with open(file, "w") as f: - for name, t in targets.items(): - f.write(name) - f.write(" ") - for i in t: - f.write(str(i)) - f.write(" ") - f.write("\n") - - -def getTargetsFromConfigurations(configs): - targets = dict() - for f, c in configs.items(): - targets[f] = [d[6] for d in c] - - return targets - - -def dumpBenchmarkTargets(name, benchmark_dir): - benchmark_targets = dict() - error = ['linear', 'log', 'quad'] - for e in error: - results_dir = os.path.join(benchmark_dir, e) - configs = getConfigurationsFromDir(results_dir) - benchmark_targets[e] = getTargetsFromConfigurations(configs) - - return benchmark_targets - - - -def dumpTargets(filename, targets): - with open(filename, "w") as f: - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - for c in config: - f.write(str(c)) - f.write(" ") - f.write("\n") - - - -def getLayerSwings(layer_desc, configurations): - - layer_swings = [] - for i in range(len(configurations)): - config_vals = configurations[i] - if len(config_vals) == 0: - continue - - layer_index = 0 - index = 0 - swing_vals = [] - - while layer_index < len(layer_desc): - if len(layer_desc[layer_index]) == 1: - promise_swing = config_vals[index] - layer_type = layer_desc[layer_index][0] - layer_type = layer_type.strip() - print ("****layer_type = ", layer_type) - if layer_type != "conv" and layer_type != "dense": - promise_swing = -9 - if layer_type == "depthwise_conv": - promise_swing = 9 - index += 1 - else: - #print ("index = ", index) - # FIXIT: Doesn't look right - print (config_vals[index], config_vals[index+1]) - promise_swing = max(config_vals[index], config_vals[index+1]) - stride = len(layer_desc[layer_index]) - index += stride - - swing_vals.append(promise_swing) - layer_index += 1 - - layer_swings.append(swing_vals) - - return layer_swings - - - - -def loadLayerDesc(layer_desc_file): - - layer_desc = [] - f = open(layer_desc_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - - -def dumpLayerTargets(targets, tuned_result_dir, layer_desc_file): - - layer_desc = loadLayerDesc(layer_desc_file) - print (layer_desc) - - file_names = [] - configurations = [] - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - config_vals = [] - for c in config: - config_vals.append(c) - print (config_vals) - - configurations.append(config_vals) - - rank = e + "_" + "_".join(name.split("_")[-2:]) - file_names.append(rank) - - - # NOTE: get PROMISE swing values corresponding to each layer - layer_swings = getLayerSwings(layer_desc, configurations) - - targets_file_path = tuned_result_dir + "/layer_targets.txt" - f = open(targets_file_path, "w+") - - for config in layer_swings: - index = 0 - for swing in config: - swing_str = "" - if swing == 8 or swing == 9: - layer_size = len(layer_desc[index]) - for i in range(layer_size): - swing_str += str(swing) - if i < layer_size - 1: - swing_str += " " - elif swing == -9: - swing_str += "8" - else: - swing_str += str(swing) - - if index < len(config) - 1: - swing_str += "," - - f.write(swing_str) - index += 1 - - f.write("\n") - - f.close() - - print(layer_swings) - return layer_swings, file_names - - - -def replaceFirstLayer(layer_swings): - - # Ensuring first conv on GPU - for conf in layer_swings: - conf[0] = 9 - - - -def computeLayerTargets(tuned_result_dir, layer_desc_file): - - targets_file_path = tuned_result_dir + "/tensor_targets.txt" - targets = dumpBenchmarkTargets(targets_file_path, tuned_result_dir) - - dumpTargets(targets_file_path, targets) - - layer_swings, file_names = dumpLayerTargets(targets, tuned_result_dir, layer_desc_file) - - replaceFirstLayer(layer_swings) - - return layer_swings, file_names - - -# Externally-called function -def compute_swing_selection(tuned_result_dir, layer_file): - - return computeLayerTargets(tuned_result_dir, layer_file) - - - - -if __name__ == "__main__": - - tuned_result_dir = "./vgg16_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition.txt" - - tuned_result_dir = "./resnet18_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition2.txt" - computeLayerTargets(tuned_result_dir, layer_file) diff --git a/hpvm/projects/hpvm-tensor-rt/bin/tensor_inline.sh b/hpvm/projects/hpvm-tensor-rt/bin/tensor_inline.sh deleted file mode 100755 index f67f22ebad5352d99238addd26d9e1b568ee2125..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/tensor_inline.sh +++ /dev/null @@ -1,2 +0,0 @@ -clang-4.0 -emit-llvm tensor_cpu_runtime.cc -S -o tensor_cpu_runtime.ll -opt-4.0 -always-inline tensor_cpu_runtime.ll -S -o tensor_cpu_runtime.ll diff --git a/hpvm/projects/hpvm-tensor-rt/bin/time_jetson_profiles.py b/hpvm/projects/hpvm-tensor-rt/bin/time_jetson_profiles.py deleted file mode 100644 index d0cde1e016fbbe67f9e98e43546bb3df38971f12..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/time_jetson_profiles.py +++ /dev/null @@ -1,256 +0,0 @@ - - - - - - -class Benchmark: - def __init__(self): - self.binary_path = "" - self.binary_time = 0 - self.batch_time = 0 - self.num_layers = 0 - self.data_size = 0 - self.num_classes = 0 - self.batch_size = 50 - - - -ResNet50 = Benchmark() -ResNet50.binary_path = "resnet_imagenet" -ResNet50.binary_time = 3.85 * 100 # 50 images * 100 batches -ResNet50.batch_time = 3.85 # Time for batch with 50 images -ResNet50.num_layers = 53 -ResNet50.data_size = 50 * 3 * 224 * 224 * 4 # *4 for Float32 Data -ResNet50.num_classes = 1000 -ResNet50.batch_size = 50 - - - -ResNet18 = Benchmark() -ResNet18.binary_path = "resnet18_cifar10" -#ResNet50.binary_time = 5.1 * 60 # 5.1 mins * 60 secs/min -#ResNet18.binary_time = 12.9 # 50 images * 100 batches -#ResNet18.batch_time = 12.9 / 50 # Time for batch with 50 images - -# Updated numbers based on batch_size = 50 -- NOTE: Underutilizes GPU - this can be better -ResNet18.binary_time = 78 # 50 images * 100 batches -ResNet18.batch_time = 78 / 100 # Time for batch with 50 images - -ResNet18.num_layers = 21 -ResNet18.data_size = 50 * 3 * 32 * 32 * 4 # *4 for Float32 Data -ResNet18.num_classes = 10 -ResNet18.batch_size = 50 - - - -MobileNet = Benchmark() -MobileNet.binary_path = "mobilenet_cifar10" -MobileNet.binary_time = 103.0 # 50 images * 100 batches -MobileNet.batch_time = 103.0 / 100 # Time for batch with 50 images -MobileNet.num_layers = 15 -MobileNet.data_size = 50 * 3 * 32 * 32 * 4 # *4 for Float32 Data -MobileNet.num_classes = 10 -MobileNet.batch_size = 50 - - - -VGG16_ImageNet = Benchmark() -VGG16_ImageNet.binary_path = "vgg16_imagenet" -#VGG16_ImageNet.binary_time = 10.6 * 60 # 5.1 mins * 60 secs/min -VGG16_ImageNet.binary_time = 4.55 * 100 # 50 images * 100 batches -VGG16_ImageNet.batch_time = 4.55 -VGG16_ImageNet.num_layers = 16 -VGG16_ImageNet.data_size = 50 * 3 * 224 * 224 * 4 -VGG16_ImageNet.num_classes = 1000 -VGG16_ImageNet.batch_size = 50 - - -VGG16_CIFAR10 = Benchmark() -VGG16_CIFAR10.binary_path = "vgg16_cifar10" -#VGG16_CIFAR10.binary_time = 19.0 # 50 images * 100 batches -#VGG16_CIFAR10.batch_time = 19.0 /50 - -# Updated numbers based on batch_size = 50 -- NOTE: Underutilizes GPU - this can be better -VGG16_CIFAR10.binary_time = 55.7 # 50 images * 100 batches -VGG16_CIFAR10.batch_time = 55.7 / 100 - -VGG16_CIFAR10.num_layers = 15 -VGG16_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -VGG16_CIFAR10.num_classes = 10 -VGG16_CIFAR10.batch_size = 50 - - -VGG16_CIFAR100 = Benchmark() -VGG16_CIFAR100.binary_path = "vgg16_cifar100" -VGG16_CIFAR100.binary_time = 55.7 # 50 images * 100 batches -VGG16_CIFAR100.batch_time = 55.7 / 100 -VGG16_CIFAR100.num_layers = 15 -VGG16_CIFAR100.data_size = 50 * 3 * 32 * 32 * 4 -VGG16_CIFAR100.num_classes = 100 -VGG16_CIFAR100.batch_size = 50 - - - -AlexNet_ImageNet = Benchmark() -AlexNet_ImageNet.binary_path = "alexnet_imagenet" -AlexNet_ImageNet.binary_time = 0.7 * 100 -AlexNet_ImageNet.batch_time = 0.7 -AlexNet_ImageNet.num_layers = 8 -AlexNet_ImageNet.data_size = 50 * 3 * 224 * 224 * 4 -AlexNet_ImageNet.num_classes = 1000 -AlexNet_ImageNet.batch_size = 50 - - - -AlexNet_CIFAR10 = Benchmark() -AlexNet_CIFAR10.binary_path = "alexnet_cifar10" -AlexNet_CIFAR10.binary_time = 23.52 -AlexNet_CIFAR10.batch_time = 23.52 / 100 -AlexNet_CIFAR10.num_layers = 6 -AlexNet_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -AlexNet_CIFAR10.num_classes = 10 -AlexNet_CIFAR10.batch_size = 50 - - -AlexNet2_CIFAR10 = Benchmark() -AlexNet2_CIFAR10.binary_path = "alexnet2_cifar10" -AlexNet2_CIFAR10.binary_time = 27.1 -AlexNet2_CIFAR10.batch_time = 27.1 / 100 -AlexNet2_CIFAR10.num_layers = 7 -AlexNet2_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -AlexNet2_CIFAR10.num_classes = 10 -AlexNet2_CIFAR10.batch_size = 50 - - - -LeNet_CIFAR10 = Benchmark() -LeNet_CIFAR10.binary_path = "lenet_keras" -LeNet_CIFAR10.binary_time = 2.5 -LeNet_CIFAR10.batch_time = 2.5 / 50 -LeNet_CIFAR10.num_layers = 4 -LeNet_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -LeNet_CIFAR10.num_classes = 10 -LeNet_CIFAR10.batch_size = 50 - - - - - - -# 100 batches with batch size of 50 each -batch_count = 100 -promise_conf_runs = 30 # 30 runs for Statistical Confidence -promise_prof_runs = 10 # 10 runs for error profile collection -promise_knobs = 7 - -total_machines = 100 -total_confs = 50 -download_time_per_1MB = (6.1 * 60) / 100 # 6.1 mins over 4G LTE network for 100 MB data upload -upload_time_per_1MB = (26.4 * 60) / 100 # 26.4 mins over 4G LTE network for 100 MB data upload - - - - -def getErrorProfileTime(Bench): - - #time_per_batch = Bench.binary_time / batch_count - - time_per_batch = Bench.batch_time - - total_knobs = promise_knobs * Bench.num_layers - total_runs = total_knobs * promise_prof_runs - - promise_total_time = total_runs * time_per_batch - - fp16_total_time = Bench.num_layers * time_per_batch - - profile_time = promise_total_time + fp16_total_time - - return profile_time - - - - - -def getConfTime(Bench): - - conf_per_machine = promise_conf_runs * (total_confs * 1.0 / total_machines) - conf_time = conf_per_machine * Bench.binary_time - - return conf_time - - - - -def getNetworkTime(Bench): - - # Calibration Download Time - download_data_MB = Bench.data_size * 1.0 / 1000000 - download_data_time = download_data_MB * download_time_per_1MB - - # Profile Uploading (to Cloud Server) Time - total_knobs = (promise_knobs + 1) * Bench.num_layers - profile_size = total_knobs * Bench.batch_size * Bench.num_classes * 4 # *4 for FP32 data - - print (" ") - print ("--- profile_size = ", profile_size) - profile_size_MB = profile_size * 1.0 / 1000000 - upload_data_time = profile_size_MB * upload_time_per_1MB - - network_time = download_data_time + upload_data_time - - print( "network_time = ", download_data_time, upload_data_time, network_time) - return network_time - - - - -def getTimeOnEdge(Bench): - - err_time = getErrorProfileTime(Bench) - conf_time = getConfTime(Bench) - network_time = getNetworkTime(Bench) - - total_time = err_time + conf_time + network_time - total_time = total_time / 60 - - return total_time - - - - -if __name__ == "__main__": - - - resnet50_time = getTimeOnEdge(ResNet50) - print ("*** ResNet50 time (mins) = ", resnet50_time) - - resnet18_time = getTimeOnEdge(ResNet18) - print ("*** ResNet18 time (mins) = ", resnet18_time) - - - mobilenet_time = getTimeOnEdge(MobileNet) - print ("*** MobileNet time (mins) = ", mobilenet_time) - - - vgg16_img_time = getTimeOnEdge(VGG16_ImageNet) - print ("*** VGG16-Imagenet time (mins) = ", vgg16_img_time) - - vgg16_cifar10_time = getTimeOnEdge(VGG16_CIFAR10) - print ("*** VGG16-CIFAR10 time (mins) = ", vgg16_cifar10_time) - - vgg16_cifar100_time = getTimeOnEdge(VGG16_CIFAR100) - print ("*** VGG16-CIFAR100 time (mins) = ", vgg16_cifar100_time) - - alexnet_img_time = getTimeOnEdge(AlexNet_ImageNet) - print ("*** AlexNet-Imagenet time (mins) = ", alexnet_img_time) - - alexnet_cifar10_time = getTimeOnEdge(AlexNet_CIFAR10) - print ("*** AlexNet-CIFAR10 time (mins) = ", alexnet_cifar10_time) - - alexnet2_cifar10_time = getTimeOnEdge(AlexNet2_CIFAR10) - print ("*** AlexNet2-CIFAR10 time (mins) = ", alexnet2_cifar10_time) - - lenet_cifar10_time = getTimeOnEdge(LeNet_CIFAR10) - print ("*** LeNet-CIFAR10 time (mins) = ", lenet_cifar10_time) diff --git a/hpvm/projects/hpvm-tensor-rt/bin/times.py b/hpvm/projects/hpvm-tensor-rt/bin/times.py deleted file mode 100644 index 082b0d91acb19e70a6c217b25f8747f3197b45b7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/times.py +++ /dev/null @@ -1,78 +0,0 @@ - - - -class Config: - def __init__(self): - self.runtime = 0 - self.fed_runs = 0 - self.full_runs = 0 - - -def computeTimes(bench): - - conf_runs = 60 - fed_time = (bench.runtime * 100) + (bench.fed_runs * conf_runs * bench.runtime) - fed_time_hrs = fed_time / (60*60) - - full_time = (bench.runtime * 1000) + (bench.full_runs * conf_runs * bench.runtime) - full_time_hrs = full_time / (60*60) - - print ("fedtime_hrs = ", fed_time_hrs, " full_time_hrs = ", full_time_hrs, "\n") - - - -if __name__ == "__main__": - - - resnet = Config() - resnet.runtime = 8 - resnet.fed_runs = 3 - resnet.full_runs = 5 - - computeTimes(resnet) - - alexnet = Config() - alexnet.runtime = 7.8 - alexnet.fed_runs = 47 - alexnet.full_runs = 274 - - computeTimes(alexnet) - - alexnet2 = Config() - alexnet2.runtime = 2.3 - alexnet2.fed_runs = 62 - alexnet2.full_runs = 339 - - computeTimes(alexnet2) - - vgg1 = Config() - vgg1.runtime = 7.4 - vgg1.fed_runs = 15 - vgg1.full_runs = 211 - - computeTimes(vgg1) - - - vgg2 = Config() - vgg2.runtime = 15.4 - vgg2.fed_runs = 8 - vgg2.full_runs = 150 - - computeTimes(vgg2) - - - lenet = Config() - lenet.runtime = 0.98 - lenet.fed_runs = 64 - lenet.full_runs = 228 - - computeTimes(lenet) - - - mobilenet = Config() - mobilenet.runtime = 11 - mobilenet.fed_runs = 32 - mobilenet.full_runs = 267 - - computeTimes(mobilenet) - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/tuner_postprocess.py b/hpvm/projects/hpvm-tensor-rt/bin/tuner_postprocess.py deleted file mode 100644 index 6fc680973783f700ed0297279a4ab5802c15e8ab..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/tuner_postprocess.py +++ /dev/null @@ -1,523 +0,0 @@ -from sys import stderr - -output_perf = list(range(21, 30 + 1)) -input_sampling = list(range(31, 36 + 1)) -red_sampling = list(range(41, 46 + 1)) -groups = { - "fp32": [11], - "fp16": [12], - "perf": output_perf, - "samp": input_sampling, - "red_samp": red_sampling -} -# 11 -> 1, 12 -> 1 -param_remap = { - 11: 1, 12: 1 -} -fp32_fp16_remap = { - 41: 42, 43: 44, 45: 46, 11: 12 -} -inv_group = {v: k for k, vs in groups.items() for v in vs} -speedups = { - 11: 1.0, - 12: 1.5, - 21: 2.25, - 22: 2.25, - 23: 1.88, - 24: 1.88, - 25: 1.88, - 26: 2.25, - 27: 2.25, - 28: 1.88, - 29: 1.88, - 30: 1.88, - 31: 2.25, - 32: 2.25, - 33: 1.8, - 34: 1.8, - 35: 1.8, - 36: 1.8, - 41: 1.5, - 42: 2.25, - 43: 1.4, - 44: 2, - 45: 1.25, - 46: 1.8 -} - - -def read_list_of_ops(template_filepath): - from re import match - from itertools import dropwhile, takewhile - with template_filepath.open() as f: - all_lines = [line.strip() for line in f.readlines()] - head_chopped = list(dropwhile( - (lambda line: match(r"\++", line) is None), all_lines))[1:] - tail_chopped = list(takewhile( - (lambda line: match(r"-+", line) is None), head_chopped)) - if not tail_chopped: - raise RuntimeError( - "Format error in file {}".format(template_filepath.as_posix()) - ) - op_lines = tail_chopped[1:] - ops = [line.split()[2] for line in op_lines] - return ops - - -def read_op_costs(filepath): - with filepath.open() as f: - return [float(line.strip()) for line in f.readlines()] - - -class Config(object): - def __init__(self, avg_acc, cost, speedup, values): - self.avg_acc, self.cost, self.speedup = avg_acc, cost, speedup - self.values = values - - @classmethod - def from_file(cls, filepath, ops): - from re import match - - with filepath.open() as f: - file_lines = f.readlines() - if not file_lines: - raise RuntimeError( - "Format error in file {}".format(filepath.as_posix())) - summary_line, config_lines = file_lines[0], file_lines[1:] - values = [int(l.strip()) for l in config_lines] - if len(values) != len(ops): - raise RuntimeError( - "Format error in file {}".format(filepath.as_posix())) - - # Summary line format: - # avg_accuracy=34.5229 config_cost=818.838299524 speedup=2.08307548754 - matched = match( - r"\s*avg_accuracy=([\d.]+)\s*config_cost=([\d.]+)\s*speedup=([\d.]+)\s*", summary_line - ) - avg_acc, cost, speedup = [float(matched.group(i)) for i in range(1, 4)] - - return cls(avg_acc, cost, speedup, values) - - @classmethod - def from_tuner_conf(cls, tuner_conf): - speedup = tuner_conf.speedup - cost = 0 - avg_acc = tuner_conf.acc - lines = [] - for _, approx_name, v in tuner_conf.lines: - replacements = groups.get(approx_name, []) - if len(replacements) == 1: - lines.append(replacements[0]) - else: - lines.append(v) - return cls(avg_acc, cost, speedup, lines) - - @staticmethod - def calculate_cost(flags, baseline_costs): - total_cost = 0 - for flag, cost in zip(flags, baseline_costs): - speedup = speedups.get(flag) - if speedup is None: - raise RuntimeError(f"Speedup of flag {flag} not given") - total_cost += cost / speedup - return total_cost - - def remap_to_fp16(self, baseline_costs): - remapped = [fp32_fp16_remap.get(v, v) for v in self.values] - if len(baseline_costs) != len(remapped): - raise RuntimeError( - "Provided baseline_costs does not map one-on-one to ops") - old_cost_match = self.calculate_cost(self.values, baseline_costs) - if abs(self.cost - old_cost_match) > 1e-2: - raise RuntimeError( - "Cost computation mismatch. Probably reading wrong costs " - "or speedup params have changed" - ) - new_cost = self.calculate_cost(remapped, baseline_costs) - speedup = (self.cost * self.speedup) / new_cost - return Config(self.avg_acc, new_cost, speedup, remapped) - - def __repr__(self): - head = f"avg_accuracy={self.avg_acc}\tconfig_cost={self.cost}\tspeedup={self.speedup}" - body = "\n".join((str(v) for v in self.values)) - return f"{head}\n{body}" - - -class TunerConf(object): - psnr_upper_bound = 200 - - def __init__(self, speedup, energy, acc, acc_loss, lines, seq_id=0): - self.speedup = speedup - self.energy = energy - self.acc = acc - self.acc_loss = acc_loss - for l in lines: - if len(l) != 3: - raise RuntimeError(f"Line {l} is malformed") - self.lines = lines - self.seq_id = seq_id - - @staticmethod - def get_baseline_conf(ops): - baseline = groups["fp32"][0] - baseline_config = Config( - avg_acc=TunerConf.psnr_upper_bound, - cost=0, # won't be used by TunerConf - speedup=1.0, - values=[baseline for _ in range(len(ops))] - ) - return TunerConf.from_config(ops, baseline_config, 0) - - @classmethod - def from_config(cls, ops, config, seq_id): - if len(ops) != len(config.values): - raise RuntimeError( - f"Number of ops mismatch in {ops} and {config.values}" - ) - lines = [] - for o, v in zip(ops, config.values): - approx_name = inv_group.get(v) - if approx_name is None: - raise RuntimeError(f"Promise flag {v} is not understood") - lines.append((o, approx_name, v)) - return cls( - speedup=config.speedup, energy=1.0, - acc=config.avg_acc, acc_loss=cls.psnr_upper_bound - config.avg_acc, - lines=lines, seq_id=seq_id - ) - - @classmethod - def many_from_file(cls, filepath): - def maybe_int(value, default=None): - try: - return int(value) - except ValueError: - return None - - import re - - with filepath.open() as f: - file_lines = f.read() - tuner_confs = [] - for match in re.finditer(r"\++\n([^-]*)\n\-+", file_lines, re.MULTILINE): - meta, *config_lines = match.group(1).split('\n') - _, *stats = meta.split(' ') - speedup, energy, acc, acc_loss = [float(s) for s in stats] - configs = [] - for line in config_lines: - _, _, op, approx, param = line.split(' ') - param = maybe_int(param, 1) - configs.append((op, approx, param)) - tuner_confs.append(cls(speedup, energy, acc, acc_loss, configs)) - return tuner_confs - - def __repr__(self): - def repr_line(idx, line): - op, approx, param = line - param = param_remap.get(param, param) - return f"{idx + 1} gpu {op} {approx} {param}\n" - - head = ( - f"+++++\nconf{self.seq_id} {self.speedup:.4f} {self.energy:.4f} " - f"{self.acc:.4f} {self.acc_loss:.4f}\n" - ) - tail = "-----" - printed_lines = "".join(( - repr_line(i, line) for i, line in enumerate(self.lines) - )) - return head + printed_lines + tail - - -def parse_config(filepath, ops, op_counter, config_summaries): - config = Config.from_file(filepath, ops) - config_summaries.append((config.speedup, config.avg_acc)) - for v, name in zip(config.values, ops): - v_group = inv_group.get(v) - op_counter[name][v_group] += 1 - - -def plot_pareto_stats(pareto, others, save_to): - import matplotlib.pyplot as plt - - if not pareto and not others: - return - p_xs, p_ys = zip(*pareto) if pareto else ([], []) - o_xs, o_ys = zip(*others) if others else ([], []) - scale = 10 - alpha = 1 - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.scatter(p_xs, p_ys, c="green", label="pareto", s=scale, alpha=alpha) - ax.scatter(o_xs, o_ys, c="red", label="non-pareto", s=scale, alpha=alpha) - ax.set_xlabel("speedup") - ax.set_ylabel("avg_psnr") - ax.legend() - fig.savefig(save_to, dpi=200) - - -def scan_config_dirs(configs_base_dir, ops): - from collections import Counter - - all_configs_dir = configs_base_dir/"high_confidence" - pareto_dir = configs_base_dir/"pareto" - if not pareto_dir.is_dir(): - print( - "No pareto directory found at {}; skipping".format( - pareto_dir.as_posix()), - file=stderr - ) - pareto_confs = set() - else: - pareto_confs = set((p.name for p in pareto_dir.iterdir())) - - counters = {name: Counter() for name in set(ops)} - pareto_summaries, other_summaries = [], [] - for filepath in all_configs_dir.iterdir(): - filename = filepath.name - if filename in pareto_confs: - filepath = pareto_dir / filename - parse_config(filepath, ops, counters, pareto_summaries) - else: - parse_config(filepath, ops, counters, other_summaries) - - return pareto_summaries, other_summaries, counters - - -def translate_configs(configs_base_dir, ops): - from pathlib import Path - - pareto_dir = configs_base_dir/"pareto" - output_file = configs_base_dir/"tuner_confs.txt" - baseline = str(TunerConf.get_baseline_conf(ops)) - tuner_conf_strs = [baseline] - for i, config_path in enumerate(pareto_dir.iterdir()): - config = Config.from_file(config_path, ops) - tuner_conf = TunerConf.from_config(ops, config, i + 1) - tuner_conf_strs.append(str(tuner_conf)) - with output_file.open('w') as f: - print("\n".join(tuner_conf_strs), file=f) - - -def print_stats(args): - from pprint import pprint - - ops = read_list_of_ops(args.bench_info/"tuner_conf_template.txt") - pareto, others, counters = scan_config_dirs(args.configs, ops) - if pareto: - plot_pareto_stats(pareto, others, args.configs/"pareto.png") - translate_configs(args.configs, ops) - pprint(counters) - - -def run_binary(bin_path): - import subprocess - import os - - fnull = open(os.devnull, 'wb') - p = subprocess.Popen(["./" + bin_path], stdout=fnull) - p.wait() - if p.returncode != 0: - # Something went wrong - print( - "Child program returned non-zero; you may want to stop and check.", - file=stderr - ) - - -def getPSNR(file_name): - with open(file_name) as f: - try: - raw_str = f.read() - violation, avg_psnr = [float(s) for s in raw_str.split(",")] - except: - return None, None - return 100 - violation, avg_psnr - - -def run_validation(args): - from pathlib import Path - from shutil import copy - from tqdm import tqdm - ops = read_list_of_ops(args.bench_info/"tuner_conf_template.txt") - binary = Path(args.binary).resolve() - dump_path = args.dump_violation - if dump_path is not None and not dump_path.is_dir(): - dump_path.mkdir() - configs = [p for p in args.configs.iterdir() if p.is_file()] - for config_path in tqdm(configs): - config = Config.from_file(config_path, ops) - promise_flags = binary.parent / "promise_flags" - with promise_flags.open('w') as f: - f.writelines((f"{v}\n" for v in config.values)) - run_binary(args.binary) - success_rate, avg_psnr = getPSNR("final_accuracy") - tqdm.write( - f"config: {config_path.as_posix()}, " - f"success_rate = {success_rate}, " - f"avg_psnr = {config.avg_acc} -> {avg_psnr}" - ) - if success_rate < args.threshold: - tqdm.write( - ( - "WARNING: config {} violates threshold on vaildation set; " - "success_rate = {}, avg_psnr = {}" - ).format(config_path, success_rate, avg_psnr), - file=stderr - ) - if dump_path is not None: - conf_name = config_path.name - copy(config_path.as_posix(), dump_path / conf_name) - - -def remap_configs(args): - ops = read_list_of_ops(args.bench_info/"tuner_conf_template.txt") - costs = read_op_costs(args.bench_info/"op_cost.txt") - output_folder = args.configs.resolve().parent / "remapped" - if not output_folder.is_dir(): - output_folder.mkdir() - for config_path in args.configs.iterdir(): - config = Config.from_file(config_path, ops) - old_speedup = config.speedup - config = config.remap_to_fp16(costs) - print(f"speedup: {old_speedup} -> {config.speedup}") - output_path = output_folder / config_path.name - with output_path.open('w') as f: - f.write(str(config)) - print( - "Finished.\n" - "Average psnr in files are not calibrated as it's impossible " - "without rerunning. Make sure to rerun the remapped configs.", - file=stderr - ) - - -def plot_compare_pareto(args): - import matplotlib.pyplot as plt - import numpy as np - - org = TunerConf.many_from_file(args.original) - cali = TunerConf.many_from_file(args.calibrated) - org, cali = org[1:], cali[1:] # remove baseline - if not org and not cali: - return - o_xs, o_ys = [tc.speedup for tc in org], [tc.acc for tc in org] - c_xs, c_ys = [tc.speedup for tc in cali], [tc.acc for tc in cali] - - scale = 10 - fig = plt.figure() - - ax1 = fig.add_subplot(211) - ax1.scatter(o_xs, o_ys, c="red", label="predicted", s=scale) - ax1.scatter(c_xs, c_ys, c="green", label="calibrated", s=scale) - ax1.set_xlabel("speedup") - ax1.set_ylabel("avg_psnr") - ax1.legend() - - ax2 = fig.add_subplot(212) - ax2.scatter(c_ys, np.array(c_xs) - np.array(o_xs), s=scale) - ax2.set_xlabel("avg_psnr") - ax2.set_ylabel("diff_speedup") - - fig.savefig(args.output.as_posix(), dpi=200) - - -def inv_translate(args): - tuner_confs = TunerConf.many_from_file(args.file)[1:] - configs = [Config.from_tuner_conf(tc) for tc in tuner_confs] - args.output_path.mkdir(exist_ok=True) - output = args.output_path/"high_confidence" - output.mkdir(exist_ok=True) - for i, conf in enumerate(configs): - with (output/f"{args.file.stem}_{i}").open('w') as f: - f.write(str(conf)) - - -def parse_args(): - import argparse - from pathlib import Path - - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers( - description="Valid subcommands", required=True, dest="subcommand" - ) - - stats_p = subparsers.add_parser( - "stats", help="Print out stats of a set of configs") - stats_p.add_argument( - "bench_info", type=Path, - help="Benchmark settings folder containing tuner_conf_template.txt" - ) - stats_p.add_argument( - "configs", type=Path, - help="Configs folder. Should contain high_confidence (and optionally pareto) subfolders" - ) - stats_p.set_defaults(func=print_stats) - - cali_p = subparsers.add_parser( - "print_cali", help="Plot calibrated + original pareto curves") - cali_p.add_argument( - "original", type=Path, help="Original pareto curve" - ) - cali_p.add_argument( - "calibrated", type=Path, help="Calibrated pareto curve" - ) - cali_p.add_argument( - "-o", "--output", default="comparison.png", - type=Path, help="Path to output image" - ) - cali_p.set_defaults(func=plot_compare_pareto) - - ref_p = subparsers.add_parser( - "validation", help="Run validation on validation set(s)" - ) - ref_p.add_argument( - "bench_info", type=Path, - help="Benchmark settings folder containing tuner_conf_template.txt" - ) - ref_p.add_argument("binary", type=str, help="Path to binary") - ref_p.add_argument( - "configs", type=Path, help="Path to folder of configs to run" - ) - ref_p.add_argument( - "-t", "--threshold", type=float, default=95.0, - help="Threshold of violation rate below which the test fails" - ) - ref_p.add_argument( - "-o", "--dump_violation", type=Path, help="Place to dump violating configs" - ) - ref_p.set_defaults(func=run_validation) - - remap_p = subparsers.add_parser( - "remap", help="Remap fp32 to fp16" - ) - remap_p.add_argument( - "bench_info", type=Path, - help="Benchmark settings folder containing tuner_conf_template.txt" - ) - remap_p.add_argument( - "configs", type=Path, help="Path to folder of configs to remap" - ) - remap_p.set_defaults(func=remap_configs) - - trans_p = subparsers.add_parser( - "translate", help="Translate tuner conf back to autotuner format" - ) - trans_p.add_argument( - "file", type=Path, help="Input file (one)" - ) - trans_p.add_argument( - "output_path", type=Path, help="Output folder" - ) - trans_p.set_defaults(func=inv_translate) - - return parser.parse_args() - - -def main(): - args = parse_args() - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/tuner_src b/hpvm/projects/hpvm-tensor-rt/bin/tuner_src deleted file mode 120000 index f24dde48b6f885fd3783f453f514546e6e4a4ed1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/tuner_src +++ /dev/null @@ -1 +0,0 @@ -../autotuner/tuner_driver_src/ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h index 500ff63bc86dce6cae0dee3f942639c07bf14ab3..5d1e0e66ad1a3402981682ed97e664ddcc173787 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h @@ -13,9 +13,11 @@ #include <tensor_runtime.h> #include <tensor.h> #include <cmath> +#include <string.h> std::vector<float> run_accuracies; +std::string model_params_path = "../../../build/model_params/"; void printTensorInfo(void* tensor_ptr){ diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_canny.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_canny.cc deleted file mode 100644 index 628ce6616cde37a5eddde5ab6049001525203580..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_canny.cc +++ /dev/null @@ -1,255 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <vector> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - -#include "tensor_custom_ops_cpu.h" - - - - -Tensor* gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -std::pair<Tensor*, Tensor*> getSobelKernels() { - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - return std::make_pair(t1, t2); -} - -/*** - -TODOs: - -* Precision calculation? -* tensorArgMax? -* tensorSelect? -* tensorContract -* autotuning support for these functions -* FP32 vs F16 versions of sampling perforation? -* Need tensorRT version and a PROMISE API version -* How to Profile? are profileEvent calls added -* Pytorch version - - -****/ - -void* canny_filter(void* dataset) { - - Tensor* gaussian = gaussianFilter(1.4, 5, 5, 1); - Tensor* kernel_x, *kernel_y; - std::tie(kernel_x, kernel_y) = getSobelKernels(); - - // 0. Grayscale - auto* summed_image = tensorReduce(dataset, 1, MathOp::Add); - auto* grayscale_image = tensorMap1(MathOp::Avg3, summed_image); - // 1. Denoise - - auto* image2 = tensorConvolution(grayscale_image, gaussian, - 2, 2, // padding - 1, 1, // strides - 1, 0); // conv_mode, conv_groups - - // 2. Get edge gradient / direction - auto *grad_x = tensorConvolution(image2, kernel_x, - 1, 1, - 1, 1, - 1, 0); - - auto *grad_y = tensorConvolution(image2, kernel_y, - 1, 1, - 1, 1, - 1, 0); - - auto *grad_mag = tensorMap2(MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = tensorReduce(grad_mag, 2, MathOp::Max); - auto *grad_max = tensorReduce(grad_max_1D, 3, MathOp::Max); - auto *grad_mag_norm = tensorMap2(MathOp::Div, grad_mag, grad_max); - return grad_mag_norm; -} - - - - -void* invoke_canny(void* input) { - - auto* result = canny_filter(input); - - printf("Done with Canny \n"); - - return result; -} - - - - - - - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string canny_input_path = dir_prefix + std::string("canny_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int test_input_size = 5000; - int batch_size = 500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - startMemTracking(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - void* canny_input = readInputBatch(canny_input_path.c_str(), 0,start,end, 3, 128, 128); - - void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv1out, conv1_bias); - void* conv1_tanh = tensorTanh(conv1out); - - // 2nd Layer - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); - void* conv2_tanh = tensorTanh(conv2out); - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 3rd Layer - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv3out, conv3_bias); - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv4out, conv4_bias); - void* conv4_tanh = tensorTanh(conv4out); - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 5th Layer - void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv5out, conv5_bias); - void* conv5_tanh = tensorTanh(conv5out); - - // 6th Layer - void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv6out, conv6_bias); - - void* conv6_tanh = tensorTanh(conv6out); - void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - void* result = tensorSoftmax(gemm1biasout); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - - std::vector<int> index_vector; - index_vector.push_back(1); - index_vector.push_back(2); - index_vector.push_back(3); - index_vector.push_back(4); - index_vector.push_back(5); - - - void* argmax_out = tensorArgMax(result); - void* select_out = tensorSelect2(argmax_out, index_vector); - void* reduced_input = tensorContract(canny_input, select_out); - - invoke_canny(reduced_input); - - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/alexnet2_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/alexnet2_cifar10_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc index 161cdd249cc1e94f0a739772e0b9b9ea86993be8..d93110945b1d1a70ec29c7788d9133dc16551ee5 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/alexnet2_cifar10_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc @@ -18,7 +18,7 @@ void testCifarNet(){ printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/alexnet2_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string labels32_path = dir_prefix + std::string("labels32.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/alexnet_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/alexnet_cifar10_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc index 8a429862f34f95793dd9ca7caa619b10dbe568ab..b7695bbd7a24712e335f0cf8bbd25290f3261dea 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/alexnet_cifar10_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc @@ -14,7 +14,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/alexnet_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc similarity index 97% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc index f04ec041644394e2258414575162b961f9849667..29f392c630a36a6044c5f804e5d3a7b252591831 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc @@ -21,7 +21,7 @@ void testLenetTanh(){ int test_batch_size = 5000; - std::string dir_prefix = std::string("../model_params/lenet_mnist/"); + std::string dir_prefix = model_params_path + std::string("/lenet_mnist/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/mobilenet_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/mobilenet_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc index dabafd4345f29d00c7271c796a8497aba8b7772d..d662dc1584c7810d8d3631d5ac16c427c3ff8b02 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/mobilenet_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc @@ -14,7 +14,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/mobilenet/"); + std::string dir_prefix = model_params_path + std::string("/mobilenet/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/resnet18_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/resnet18_cifar10_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc index 9779b95d865d1939244f50c3910d7ed770b0729d..741c4a443cc9a56c443ec5858aaed5a7d5705268 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/resnet18_cifar10_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/resnet18_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/resnet18_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/vgg16_cifar100_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/vgg16_cifar100_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc index 7107defe9d154731a46efaf5c8ad244ceb69bad7..9ac1deea68c693f8baf2df2d9f2b626b3597ad7f 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/vgg16_cifar100_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/vgg16_cifar100/"); + std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/vgg16_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/vgg16_cifar10_half.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc index 45e74fbe32e053e2d43c1dde0f90460c21ab0118..f92bac10e27162fe0bc59c07aa4f9ede542ccd6e 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/vgg16_cifar10_half.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/vgg16_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc index 5918f4f18ebdb7d4f2fa3e37c0982b8ed8d10932..50d9747f990d486c4543607d16d4a4ccb88b0517 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc @@ -19,7 +19,7 @@ void testCifarNet(){ printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/alexnet2_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string labels32_path = dir_prefix + std::string("labels32.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc index 8129fbfafcdd3e991e67d33fd3013e1700da45c5..1a76f1ae8ba6059124117b82cd72e8ccd6cdeba6 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/alexnet_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_imagenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc similarity index 100% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_imagenet.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc similarity index 97% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc index c047ffe090a93711cb66973ef6622d46fccdcee3..7508f3119eeb469a164fad9741000308e3e8c031 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc @@ -22,7 +22,7 @@ void testLenetTanh(){ int test_batch_size = 5000; - std::string dir_prefix = std::string("../model_params/lenet_mnist/"); + std::string dir_prefix = model_params_path + std::string("/lenet_mnist/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc index 78ca2dac98435ea146da44a78bb2f7405af8c5ef..7c311a568647caa107112bed4982fb57254dc7b3 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc @@ -15,7 +15,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/mobilenet/"); + std::string dir_prefix = model_params_path + std::string("/mobilenet/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/resnet18_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/resnet18_cifar10.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc index b0c868085bae1abc2025364609114cc21c7d213a..87b8cd4156ed8d7f882ff7642420c995cd7c3a0f 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/resnet18_cifar10.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(1); - std::string dir_prefix = std::string("../model_params/resnet18_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/resnet18_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/resnet50_imagenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/resnet50_imagenet.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc index 1192d04de200c8e8183c35861da2d04aa705e955..0914b3f70c353ee7e56c39ccf52f21914618301e 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/resnet50_imagenet.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc @@ -15,7 +15,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("/shared/hsharif3/resnet50_imagenet_tune/"); + std::string dir_prefix = model_params_path + std::string("/shared/hsharif3/resnet50_imagenet/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar10.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc index e8469e8a4892f51337118e4699f09ae98c13bf71..a6dc7cbc11cf77357a749bff117489fc4b292941 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar10.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/vgg16_cifar10/"); + std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar100.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar100.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc index 0290a2782880c1aa8c1ea33f5564926665d968d6..2539f8d8722909724a9dc2890e82f4f98853f5cd 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar100.cc +++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc @@ -13,7 +13,7 @@ int main(){ llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/vgg16_cifar100/"); + std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_imagenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc similarity index 100% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_imagenet.cc rename to hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/mobilenet_shallow_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/mobilenet_shallow_half.cc deleted file mode 100644 index 7ce9a90e10697c979adc470345244a2cc326f0cb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/mobilenet_shallow_half.cc +++ /dev/null @@ -1,235 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 2000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - - - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorHalfRelu(var_8); - void* var_11 = tensorHalfConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorHalfRelu(var_12); - void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorHalfRelu(var_15); - void* var_18 = tensorHalfConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorHalfRelu(var_19); - void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorHalfRelu(var_22); - void* var_26 = tensorHalfConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorHalfRelu(var_27); - void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorHalfRelu(var_34); - void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorHalfRelu(var_37); - void* var_41 = tensorHalfConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorHalfRelu(var_42); - void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorHalfRelu(var_45); - void* var_47 = tensorHalfPooling(var_46,1,2,2,0,0,2,2); - void* var_49 = tensorHalfGemmGPU(var_47, dense_1_w); - void* var_50 = tensorHalfAdd(var_49, dense_1_b); - void* var_51 = tensorSoftmax(var_50); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_51); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc deleted file mode 100644 index 82fe03247f36dbe6de31205a60344b7f44f85bad..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc +++ /dev/null @@ -1,169 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - - // NOTE: Starting time profiling - startProfiling(); - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - for (int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* conv1out = tensorHalfConvolution(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv1out, conv1_bias); - void* conv1_tanh = tensorHalfTanh(conv1out); - - // 2nd Layer - void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv2out, conv2_bias); - void* conv2_tanh = tensorHalfTanh(conv2out); - void* pool2out = tensorHalfPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 3rd Layer - void* conv3out = tensorHalfConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv3out, conv3_bias); - void* conv3_tanh = tensorHalfTanh(conv3out); - - // 4th Layer - void* conv4out = tensorHalfConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv4out, conv4_bias); - void* conv4_tanh = tensorHalfTanh(conv4out); - void* pool4out = tensorHalfPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 5th Layer - void* conv5out = tensorHalfConvolution(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv5out, conv5_bias); - void* conv5_tanh = tensorHalfTanh(conv5out); - - // 6th Layer - void* conv6out = tensorHalfConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv6out, conv6_bias); - - void* conv6_tanh = tensorHalfTanh(conv6out); - void* pool6out = tensorHalfPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorHalfGemmGPU(pool6out, fc1_weights); - void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias); - void* result = tensorSoftmax(gemm1biasout); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc deleted file mode 100644 index 965e3170ea5c9df7dec1abe13d06581fe56f3b21..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc +++ /dev/null @@ -1,126 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - for (int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); - void* var_1 = tensorHalfAdd(var_0, conv2d_1_b); - void* var_2 = tensorHalfTanh(var_1); - void* var_3 = tensorHalfPooling(var_2,0,2,2,0,0,2,2); - void* var_5 = tensorHalfConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); - void* var_6 = tensorHalfAdd(var_5, conv2d_2_b); - void* var_7 = tensorHalfTanh(var_6); - void* var_8 = tensorHalfPooling(var_7,0,2,2,0,0,2,2); - void* var_10 = tensorHalfConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorHalfAdd(var_10, conv2d_3_b); - void* var_12 = tensorHalfTanh(var_11); - void* var_13 = tensorHalfConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_14 = tensorHalfAdd(var_13, conv2d_4_b); - void* var_15 = tensorHalfTanh(var_14); - void* var_16 = tensorHalfConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorHalfAdd(var_16, conv2d_5_b); - void* var_18 = tensorHalfTanh(var_17); - void* var_19 = tensorHalfPooling(var_18,0,2,2,0,0,2,2); - void* var_22 = tensorHalfGemmGPU(var_19, dense_1_w); - void* var_23 = tensorHalfAdd(var_22, dense_1_b); - void* var_24 = tensorSoftmax(var_23); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_24); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/lenet_keras_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/lenet_keras_half_profiling.cc deleted file mode 100644 index e6ffd6b03de4901780511e56afdb5faac85bb807..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/lenet_keras_half_profiling.cc +++ /dev/null @@ -1,186 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 10; - - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - float final_accuracy = 0.0; - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - profiler.resume_profiler(); - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - - void* conv1_tanh = tensorHalfTanh(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - - void* conv2_tanh = tensorHalfTanh(pool2out); - - void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights); - - void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias); - - void* tanh1out = tensorHalfTanh(gemm1biasout); - - void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights); - - void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias); - - void* tanh2out = tensorHalfTanh(gemm2_biasout); - - void* result = tensorSoftmax(tanh2out); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - profiler.reset(); - - // End profiling and dump output to profile.txt - stopProfiling(); - - float accuracy = computeAccuracy2(labels, test_batch_size, result); - final_accuracy += accuracy; - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / total_runs; - dumpFinalAccuracy(final_accuracy); -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc deleted file mode 100644 index 641047b50dc1219f1d02bbfb75e2014840c90d96..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc +++ /dev/null @@ -1,416 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - - int total_runs = 10; - float final_accuracy = 0.0; - - for (int run_num = 0; run_num < total_runs; run_num++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorHalfRelu(var_8); - void* var_11 = tensorHalfConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorHalfRelu(var_12); - void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorHalfRelu(var_15); - void* var_18 = tensorHalfConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorHalfRelu(var_19); - void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorHalfRelu(var_22); - void* var_26 = tensorHalfConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorHalfRelu(var_27); - void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorHalfRelu(var_34); - void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorHalfRelu(var_37); - void* var_41 = tensorHalfConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorHalfRelu(var_42); - void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorHalfRelu(var_45); - void* var_48 = tensorHalfConvCutlass(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_50 = tensorHalfRelu(var_49); - void* var_51 = tensorHalfConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_53 = tensorHalfRelu(var_52); - void* var_55 = tensorHalfConvCutlass(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_56 = tensorHalfBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_57 = tensorHalfRelu(var_56); - void* var_58 = tensorHalfConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_59 = tensorHalfBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_60 = tensorHalfRelu(var_59); - void* var_63 = tensorHalfConvCutlass(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_65 = tensorHalfRelu(var_64); - void* var_66 = tensorHalfConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_68 = tensorHalfRelu(var_67); - void* var_70 = tensorHalfConvCutlass(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_71 = tensorHalfBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_72 = tensorHalfRelu(var_71); - void* var_73 = tensorHalfConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorHalfBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_75 = tensorHalfRelu(var_74); - void* var_77 = tensorHalfConvCutlass(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_78 = tensorHalfBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_79 = tensorHalfRelu(var_78); - void* var_80 = tensorHalfConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_81 = tensorHalfBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_82 = tensorHalfRelu(var_81); - void* var_85 = tensorHalfConvCutlass(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_86 = tensorHalfBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_87 = tensorHalfRelu(var_86); - void* var_88 = tensorHalfConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_89 = tensorHalfBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_90 = tensorHalfRelu(var_89); - void* var_92 = tensorHalfConvCutlass(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_93 = tensorHalfBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_94 = tensorHalfRelu(var_93); - void* var_95 = tensorHalfConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_96 = tensorHalfBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_97 = tensorHalfRelu(var_96); - void* var_99 = tensorHalfPooling(var_97,1,2,2,0,0,2,2); - void* var_101 = tensorHalfGemmGPU(var_99, dense_1_w); - void* var_102 = tensorHalfAdd(var_101, dense_1_b); - void* var_103 = tensorSoftmax(var_102); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_103); - final_accuracy += accuracy; - freeBatchMemory(); - } - } - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_half_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_half_cifar10_profiling.cc deleted file mode 100644 index 1c6a3955b1ad644363947106bb0f77d6b9a77050..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_half_cifar10_profiling.cc +++ /dev/null @@ -1,438 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet_quant/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - startProfiling(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - profiler.resume_profiler(); - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorHalfRelu(var_8); - void* var_11 = tensorHalfConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorHalfRelu(var_12); - void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorHalfRelu(var_15); - void* var_18 = tensorHalfConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorHalfRelu(var_19); - void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorHalfRelu(var_22); - void* var_26 = tensorHalfConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorHalfRelu(var_27); - void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorHalfRelu(var_34); - void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorHalfRelu(var_37); - void* var_41 = tensorHalfConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorHalfRelu(var_42); - void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorHalfRelu(var_45); - void* var_48 = tensorHalfConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_50 = tensorHalfRelu(var_49); - void* var_51 = tensorHalfConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_53 = tensorHalfRelu(var_52); - void* var_55 = tensorHalfConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_56 = tensorHalfBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_57 = tensorHalfRelu(var_56); - void* var_58 = tensorHalfConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_59 = tensorHalfBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_60 = tensorHalfRelu(var_59); - void* var_63 = tensorHalfConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_65 = tensorHalfRelu(var_64); - void* var_66 = tensorHalfConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_68 = tensorHalfRelu(var_67); - void* var_70 = tensorHalfConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_71 = tensorHalfBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_72 = tensorHalfRelu(var_71); - void* var_73 = tensorHalfConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorHalfBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_75 = tensorHalfRelu(var_74); - void* var_77 = tensorHalfConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_78 = tensorHalfBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_79 = tensorHalfRelu(var_78); - void* var_80 = tensorHalfConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_81 = tensorHalfBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_82 = tensorHalfRelu(var_81); - void* var_85 = tensorHalfConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_86 = tensorHalfBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_87 = tensorHalfRelu(var_86); - void* var_88 = tensorHalfConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_89 = tensorHalfBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_90 = tensorHalfRelu(var_89); - void* var_92 = tensorHalfConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_93 = tensorHalfBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_94 = tensorHalfRelu(var_93); - void* var_95 = tensorHalfConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_96 = tensorHalfBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_97 = tensorHalfRelu(var_96); - void* var_99 = tensorHalfPooling(var_97,1,2,2,0,0,2,2); - void* var_101 = tensorHalfGemmGPU(var_99, dense_1_w); - void* var_102 = tensorHalfAdd(var_101, dense_1_b); - void* var_103 = tensorSoftmax(var_102); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_103); - final_accuracy += accuracy; - freeBatchMemory(); - - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc deleted file mode 100644 index f68eb1793b66b0579f2ed6dbff26a56677f2aa95..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc +++ /dev/null @@ -1,249 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - - -int main(int argc, char* argv[]){ - - int total_runs = 10; - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - - llvm_hpvm_initTensorRt(0); - - //std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/"); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - - - float final_accuracy = 0.0; - - for(int j = 0; j < total_runs; j++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorHalfRelu(var_8); - void* var_11 = tensorHalfConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorHalfRelu(var_12); - void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorHalfRelu(var_15); - void* var_18 = tensorHalfConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorHalfRelu(var_19); - void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorHalfRelu(var_22); - void* var_26 = tensorHalfConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorHalfRelu(var_27); - void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorHalfRelu(var_34); - void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorHalfRelu(var_37); - void* var_41 = tensorHalfConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorHalfRelu(var_42); - void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorHalfRelu(var_45); - void* var_47 = tensorHalfPooling(var_46,1,2,2,0,0,2,2); - void* var_49 = tensorHalfGemmGPU(var_47, dense_1_w); - void* var_50 = tensorHalfAdd(var_49, dense_1_b); - void* var_51 = tensorSoftmax(var_50); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_51); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - //final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy / batch_count); - } - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_shallow_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_shallow_half_profiling.cc deleted file mode 100644 index c641db1a05efe44d4801da1ebdcaf2ae8945e7f2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/mobilenet_shallow_half_profiling.cc +++ /dev/null @@ -1,225 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorHalfRelu(var_8); - void* var_11 = tensorHalfConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorHalfRelu(var_12); - void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorHalfRelu(var_15); - void* var_18 = tensorHalfConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorHalfRelu(var_19); - void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorHalfRelu(var_22); - void* var_26 = tensorHalfConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorHalfRelu(var_27); - void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorHalfRelu(var_34); - void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorHalfRelu(var_37); - void* var_40 = tensorHalfPooling(var_38,1,2,2,0,0,2,2); - void* var_42 = tensorHalfGemmGPU(var_40, dense_1_w); - void* var_43 = tensorHalfAdd(var_42, dense_1_b); - void* var_44 = tensorSoftmax(var_43); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_44); - final_accuracy += accuracy; - freeBatchMemory(); - } - } - - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc deleted file mode 100644 index f91814e8390a400159467298a3702147cbf2f4b3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc +++ /dev/null @@ -1,242 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; //100; - - // NOTE: Starting time profiling - startProfiling(); - - Profiler profiler; - profiler.start_profiler(); - double total_time = 0.0; - - for (int itrs = 0; itrs < total_runs; itrs++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_2 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_3 = tensorHalfAdd(var_2, conv2d_1_b); - void* var_4 = tensorHalfRelu(var_3); - void* var_6 = tensorHalfConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_7 = tensorHalfAdd(var_6, conv2d_2_b); - void* var_8 = tensorHalfRelu(var_7); - void* var_10 = tensorHalfConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorHalfAdd(var_10, conv2d_3_b); - void* var_12 = tensorHalfAdd(var_4, var_11); - void* var_13 = tensorHalfRelu(var_12); - void* var_15 = tensorHalfConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_16 = tensorHalfAdd(var_15, conv2d_4_b); - void* var_17 = tensorHalfRelu(var_16); - void* var_19 = tensorHalfConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_20 = tensorHalfAdd(var_19, conv2d_5_b); - void* var_21 = tensorHalfAdd(var_13, var_20); - void* var_22 = tensorHalfRelu(var_21); - void* var_24 = tensorHalfConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorHalfAdd(var_24, conv2d_6_b); - void* var_26 = tensorHalfRelu(var_25); - void* var_28 = tensorHalfConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorHalfAdd(var_28, conv2d_7_b); - void* var_30 = tensorHalfAdd(var_22, var_29); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); - void* var_34 = tensorHalfAdd(var_33, conv2d_8_b); - void* var_35 = tensorHalfRelu(var_34); - void* var_37 = tensorHalfConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_38 = tensorHalfAdd(var_37, conv2d_9_b); - void* var_40 = tensorHalfConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); - void* var_41 = tensorHalfAdd(var_40, conv2d_10_b); - void* var_42 = tensorHalfAdd(var_41, var_38); - void* var_43 = tensorHalfRelu(var_42); - void* var_45 = tensorHalfConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_46 = tensorHalfAdd(var_45, conv2d_11_b); - void* var_47 = tensorHalfRelu(var_46); - void* var_49 = tensorHalfConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_50 = tensorHalfAdd(var_49, conv2d_12_b); - void* var_51 = tensorHalfAdd(var_43, var_50); - void* var_52 = tensorHalfRelu(var_51); - void* var_54 = tensorHalfConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_55 = tensorHalfAdd(var_54, conv2d_13_b); - void* var_56 = tensorHalfRelu(var_55); - void* var_58 = tensorHalfConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); - void* var_59 = tensorHalfAdd(var_58, conv2d_14_b); - void* var_60 = tensorHalfAdd(var_52, var_59); - void* var_61 = tensorHalfRelu(var_60); - void* var_63 = tensorHalfConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); - void* var_64 = tensorHalfAdd(var_63, conv2d_15_b); - void* var_65 = tensorHalfRelu(var_64); - void* var_67 = tensorHalfConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); - void* var_68 = tensorHalfAdd(var_67, conv2d_16_b); - void* var_70 = tensorHalfConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); - void* var_71 = tensorHalfAdd(var_70, conv2d_17_b); - void* var_72 = tensorHalfAdd(var_71, var_68); - void* var_73 = tensorHalfRelu(var_72); - void* var_75 = tensorHalfConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); - void* var_76 = tensorHalfAdd(var_75, conv2d_18_b); - void* var_77 = tensorHalfRelu(var_76); - void* var_79 = tensorHalfConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); - void* var_80 = tensorHalfAdd(var_79, conv2d_19_b); - void* var_81 = tensorHalfAdd(var_73, var_80); - void* var_82 = tensorHalfRelu(var_81); - void* var_84 = tensorHalfConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); - void* var_85 = tensorHalfAdd(var_84, conv2d_20_b); - void* var_86 = tensorHalfRelu(var_85); - void* var_88 = tensorHalfConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); - void* var_89 = tensorHalfAdd(var_88, conv2d_21_b); - void* var_90 = tensorHalfAdd(var_82, var_89); - void* var_91 = tensorHalfRelu(var_90); - void* var_92 = tensorHalfPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorHalfGemmGPU(var_92, dense_1_w); - void* var_95 = tensorHalfAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_96); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - stopProfiling(); - - profiler.stop_profiler(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc deleted file mode 100644 index b778b1720c8a2db2f90230c3e57d0e0928f8665b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc +++ /dev/null @@ -1,182 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - double total_time = 0.0; - - for (int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorHalfAdd(var_0, conv2d_1_b); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorHalfAdd(var_4, conv2d_2_b); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorHalfConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorHalfAdd(var_8, conv2d_3_b); - void* var_10 = tensorHalfRelu(var_9); - void* var_12 = tensorHalfConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorHalfAdd(var_12, conv2d_4_b); - void* var_14 = tensorHalfRelu(var_13); - void* var_15 = tensorHalfPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorHalfConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorHalfAdd(var_16, conv2d_5_b); - void* var_18 = tensorHalfRelu(var_17); - void* var_20 = tensorHalfConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorHalfAdd(var_20, conv2d_6_b); - void* var_22 = tensorHalfRelu(var_21); - void* var_24 = tensorHalfConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorHalfAdd(var_24, conv2d_7_b); - void* var_26 = tensorHalfRelu(var_25); - void* var_27 = tensorHalfPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorHalfConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorHalfAdd(var_28, conv2d_8_b); - void* var_30 = tensorHalfRelu(var_29); - void* var_32 = tensorHalfConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorHalfAdd(var_32, conv2d_9_b); - void* var_34 = tensorHalfRelu(var_33); - void* var_36 = tensorHalfConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorHalfAdd(var_36, conv2d_10_b); - void* var_38 = tensorHalfRelu(var_37); - void* var_39 = tensorHalfPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorHalfConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorHalfAdd(var_40, conv2d_11_b); - void* var_42 = tensorHalfRelu(var_41); - void* var_44 = tensorHalfConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorHalfAdd(var_44, conv2d_12_b); - void* var_46 = tensorHalfRelu(var_45); - void* var_48 = tensorHalfConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorHalfAdd(var_48, conv2d_13_b); - void* var_50 = tensorHalfRelu(var_49); - void* var_51 = tensorHalfPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorHalfGemmGPU(var_51, dense_1_w); - void* var_55 = tensorHalfAdd(var_54, dense_1_b); - void* var_56 = tensorHalfRelu(var_55); - void* var_58 = tensorHalfGemmGPU(var_56, dense_2_w); - void* var_59 = tensorHalfAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - } - - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc deleted file mode 100644 index 3f97e5dbde3b6d124888a8c74d435880097a394c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc +++ /dev/null @@ -1,189 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../../tensor_runtime/include/tensor_runtime.h" -#include "../../../include/utils.h" - -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - - // NOTE: Starting time profiling - startProfiling(); - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - for (int itrs = 0; itrs < total_runs; itrs++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorHalfAdd(var_0, conv2d_1_b); - void* var_2 = tensorHalfRelu(var_1); - void* var_4 = tensorHalfConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorHalfAdd(var_4, conv2d_2_b); - void* var_6 = tensorHalfRelu(var_5); - void* var_7 = tensorHalfPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorHalfConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorHalfAdd(var_8, conv2d_3_b); - void* var_10 = tensorHalfRelu(var_9); - void* var_12 = tensorHalfConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorHalfAdd(var_12, conv2d_4_b); - void* var_14 = tensorHalfRelu(var_13); - void* var_15 = tensorHalfPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorHalfConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorHalfAdd(var_16, conv2d_5_b); - void* var_18 = tensorHalfRelu(var_17); - void* var_20 = tensorHalfConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorHalfAdd(var_20, conv2d_6_b); - void* var_22 = tensorHalfRelu(var_21); - void* var_24 = tensorHalfConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorHalfAdd(var_24, conv2d_7_b); - void* var_26 = tensorHalfRelu(var_25); - void* var_27 = tensorHalfPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorHalfConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorHalfAdd(var_28, conv2d_8_b); - void* var_30 = tensorHalfRelu(var_29); - void* var_32 = tensorHalfConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorHalfAdd(var_32, conv2d_9_b); - void* var_34 = tensorHalfRelu(var_33); - void* var_36 = tensorHalfConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorHalfAdd(var_36, conv2d_10_b); - void* var_38 = tensorHalfRelu(var_37); - void* var_39 = tensorHalfPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorHalfConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorHalfAdd(var_40, conv2d_11_b); - void* var_42 = tensorHalfRelu(var_41); - void* var_44 = tensorHalfConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorHalfAdd(var_44, conv2d_12_b); - void* var_46 = tensorHalfRelu(var_45); - void* var_48 = tensorHalfConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorHalfAdd(var_48, conv2d_13_b); - void* var_50 = tensorHalfRelu(var_49); - void* var_51 = tensorHalfPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorHalfGemmGPU(var_51, dense_1_w); - void* var_55 = tensorHalfAdd(var_54, dense_1_b); - void* var_56 = tensorHalfRelu(var_55); - void* var_58 = tensorHalfGemmGPU(var_56, dense_2_w); - void* var_59 = tensorHalfAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_60); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - profiler.stop_profiler(); - // Start power and performance profiling - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_shallow.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_shallow.cc deleted file mode 100644 index d30518216f76160e183a915a6e6da2018239ab60..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_shallow.cc +++ /dev/null @@ -1,240 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(int argc, char* argv[]){ - - int total_runs = 1; - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 2500; - int batch_count = test_input_size / batch_size; - - - for(int j = 0; j < total_runs; j++){ - float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); - void* var_49 = tensorGemmGPU(var_47, dense_1_w); - void* var_50 = tensorAdd(var_49, dense_1_b); - void* var_51 = tensorSoftmax(var_50); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_51); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_piped.cc deleted file mode 100644 index 653d2e7bdf7f7d006dc89fb99027ac58bd336c45..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_piped.cc +++ /dev/null @@ -1,172 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,128,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816435, 2.0934134, conv2d_1_w, -0.5421946, 0.3710851, conv2d_1_b, -0.06697306, 0.040868897, 1, 1, 1, 1, -1, 0, 0, -0.9998477, 0.99987465, 9); - // void* var_1 = ConvLayer_PROMISE(var_0, -0.9998477, 0.99987465, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.99997115, 1.0, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.8, 0.8, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.99997115, 1.0, 9); - - void* var_2 = ConvLayer_PROMISE(var_1, -0.99997115, 1.0, conv2d_3_w, -0.44134507, 0.79587924, conv2d_3_b, -0.80424446, 0.75330096, 1, 1, 1, 1, -1, 0, 0, -0.9999999, 1.0, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9999999, 1.0, conv2d_4_w, -0.2883836, 0.31025785, conv2d_4_b, -0.6353164, 0.29015934, 1, 1, 1, 1, 0, 2, 0, -0.9999999, 0.99999934, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9999999, 0.99999934, conv2d_5_w, -0.2792431, 0.37689754, conv2d_5_b, -1.1379756, 1.2391574, 1, 1, 1, 1, -1, 0, 0, -1.0, 1.0, 9); - void* var_5 = ConvLayer_PROMISE(var_4, -1.0, 1.0, conv2d_6_w, -0.27078503, 0.27942517, conv2d_6_b, -0.503003, 0.12762362, 1, 1, 1, 1, 0, 2, 0, -0.9999941, 0.9999964, 9); - void* var_6 = FCLayer_PROMISE(var_5, -0.9999941, 0.9999964, dense_1_w, -0.24273404, 0.5845544, dense_1_b, -0.53745, 0.558251, -1, -140.6419, 16.402884, 9); - void* var_7 = tensorSoftmax(var_6); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_7); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_7, classConfs, predictedLabels, relative_start, relative_end); - } - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc deleted file mode 100644 index ab3a20dfafbd636b03e2f3496eb6d016cd57a394..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc +++ /dev/null @@ -1,167 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 1000; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,128,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816435, 2.0934134, conv2d_1_w, -0.5421946, 0.3710851, conv2d_1_b, -0.06697306, 0.040868897, 1, 1, 1, 1, -1, 0, 0, -0.9998477, 0.99987465, 9); - // void* var_1 = ConvLayer_PROMISE(var_0, -0.9998477, 0.99987465, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.99997115, 1.0, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.8, 0.8, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.99997115, 1.0, 9); - - void* var_2 = ConvLayer_PROMISE(var_1, -0.99997115, 1.0, conv2d_3_w, -0.44134507, 0.79587924, conv2d_3_b, -0.80424446, 0.75330096, 1, 1, 1, 1, -1, 0, 0, -0.9999999, 1.0, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9999999, 1.0, conv2d_4_w, -0.2883836, 0.31025785, conv2d_4_b, -0.6353164, 0.29015934, 1, 1, 1, 1, 0, 2, 0, -0.9999999, 0.99999934, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9999999, 0.99999934, conv2d_5_w, -0.2792431, 0.37689754, conv2d_5_b, -1.1379756, 1.2391574, 1, 1, 1, 1, -1, 0, 0, -1.0, 1.0, 9); - void* var_5 = ConvLayer_PROMISE(var_4, -1.0, 1.0, conv2d_6_w, -0.27078503, 0.27942517, conv2d_6_b, -0.503003, 0.12762362, 1, 1, 1, 1, 0, 2, 0, -0.9999941, 0.9999964, 9); - void* var_6 = FCLayer_PROMISE(var_5, -0.9999941, 0.9999964, dense_1_w, -0.24273404, 0.5845544, dense_1_b, -0.53745, 0.558251, -1, -140.6419, 16.402884, 9); - void* var_7 = tensorSoftmax(var_6); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_7); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_7, classConfs, predictedLabels, relative_start, relative_end); - } - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_imagenet_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_imagenet_piped.cc deleted file mode 100644 index da2331c9654cedc49241d1cf573fdb4886469180..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_imagenet_piped.cc +++ /dev/null @@ -1,106 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 200; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("/shared/hsharif3/alexnet_imagenet_tune/"); - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,9216,4096); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,4096,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,4096,4096); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,4096,1,1); - std::string dense_3_w_path = dir_prefix + std::string("dense_3_w.bin"); - void* dense_3_w = readTrainedWeights(dense_3_w_path.c_str(), 0,1,1,4096,1000); - std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); - void* dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0,1,1000,1,1); - - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - void* var_0 = ConvLayer_PROMISE2(input, 0.0, 255.0, conv2d_1_w, -0.5503702693581581, 0.5811487324237921, conv2d_1_b, -2.802485, 1.648145, 2, 2, 4, 4, 0, 3, 2, 1, 0.0, 1572.3096923828125, 9); - void* var_1 = ConvLayer_PROMISE2(var_0, 0.0, 1572.3096923828125, conv2d_2_w, -0.2867645202279091, 0.26272463005783797, conv2d_2_b, -0.47985682, 0.501206, 2, 2, 1, 1, 0, 3, 2, 1, 0.0, 3183.7813264160477, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 3183.7813264160477, conv2d_3_w, -0.16606662392616273, 0.15785247704386754, conv2d_3_b, -0.42038992, 0.5545839, 1, 1, 1, 1, -1, 0, 1, 0.0, 1765.4451872558668, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 1765.4451872558668, conv2d_4_w, -0.10464580833911895, 0.11035470351576919, conv2d_4_b, -1.4275751, 0.9042998, 1, 1, 1, 1, -1, 0, 1, 0.0, 1345.5418548586083, 9); - void* var_4 = ConvLayer_PROMISE2(var_3, 0.0, 1345.5418548586083, conv2d_5_w, -0.09240880391001702, 0.10250756608694818, conv2d_5_b, -0.45662758, 2.4040315, 1, 1, 1, 1, 0, 3, 2, 1, 0.0, 1227.3563232421875, 9); - void* var_5 = FCLayer_PROMISE(var_4, 0.0, 1227.3563232421875, dense_1_w, -0.030517672039568428, 0.02963459612801672, dense_1_b, -0.07124679, 0.09377053, 1, 0.0, 1034.5966391601676, 9); - void* var_6 = FCLayer_PROMISE(var_5, 0.0, 1034.5966391601676, dense_2_w, -0.038392101023346184, 0.039147199764847845, dense_2_b, -0.050027702, 0.1841282, 1, 0.0, 839.0697069702154, 9); - void* var_7 = FCLayer_PROMISE(var_6, 0.0, 839.0697069702154, dense_3_w, -0.05494491942599416, 0.08549865524470925, dense_3_b, -0.16314922, 0.15416704, -1, -608.3993963623047, 1082.8444653320819, 9); - void* var_8 = tensorSoftmax(var_7); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_8); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_imagenet_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_imagenet_promise.cc deleted file mode 100644 index c848c8614d4ea68fd9fbeb5ab8fd072f4aa15b19..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_imagenet_promise.cc +++ /dev/null @@ -1,102 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 200; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("/shared/hsharif3/alexnet_imagenet_tune/"); - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,9216,4096); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,4096,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,4096,4096); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,4096,1,1); - std::string dense_3_w_path = dir_prefix + std::string("dense_3_w.bin"); - void* dense_3_w = readTrainedWeights(dense_3_w_path.c_str(), 0,1,1,4096,1000); - std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); - void* dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0,1,1000,1,1); - - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - void* var_0 = ConvLayer_PROMISE2(input, 0.0, 255.0, conv2d_1_w, -0.5503702693581581, 0.5811487324237921, conv2d_1_b, -2.802485, 1.648145, 2, 2, 4, 4, 0, 3, 2, 1, 0.0, 1572.3096923828125, 9); - void* var_1 = ConvLayer_PROMISE2(var_0, 0.0, 1572.3096923828125, conv2d_2_w, -0.2867645202279091, 0.26272463005783797, conv2d_2_b, -0.47985682, 0.501206, 2, 2, 1, 1, 0, 3, 2, 1, 0.0, 3183.7813264160477, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 3183.7813264160477, conv2d_3_w, -0.16606662392616273, 0.15785247704386754, conv2d_3_b, -0.42038992, 0.5545839, 1, 1, 1, 1, -1, 0, 1, 0.0, 1765.4451872558668, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 1765.4451872558668, conv2d_4_w, -0.10464580833911895, 0.11035470351576919, conv2d_4_b, -1.4275751, 0.9042998, 1, 1, 1, 1, -1, 0, 1, 0.0, 1345.5418548586083, 9); - void* var_4 = ConvLayer_PROMISE2(var_3, 0.0, 1345.5418548586083, conv2d_5_w, -0.09240880391001702, 0.10250756608694818, conv2d_5_b, -0.45662758, 2.4040315, 1, 1, 1, 1, 0, 3, 2, 1, 0.0, 1227.3563232421875, 9); - void* var_5 = FCLayer_PROMISE(var_4, 0.0, 1227.3563232421875, dense_1_w, -0.030517672039568428, 0.02963459612801672, dense_1_b, -0.07124679, 0.09377053, 1, 0.0, 1034.5966391601676, 9); - void* var_6 = FCLayer_PROMISE(var_5, 0.0, 1034.5966391601676, dense_2_w, -0.038392101023346184, 0.039147199764847845, dense_2_b, -0.050027702, 0.1841282, 1, 0.0, 839.0697069702154, 9); - void* var_7 = FCLayer_PROMISE(var_6, 0.0, 839.0697069702154, dense_3_w, -0.05494491942599416, 0.08549865524470925, dense_3_b, -0.16314922, 0.15416704, -1, -608.3993963623047, 1082.8444653320819, 9); - void* var_8 = tensorSoftmax(var_7); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_8); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_piped.cc deleted file mode 100644 index 22fe979cb5bbae5964ff33444ab0fbe9dec82cf1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_piped.cc +++ /dev/null @@ -1,167 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10/"); - //std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816426241908337, 2.0934095498544254, conv2d_1_w, -0.33087718, 0.3323643, conv2d_1_b, -0.7782218, 0.6020472, 5, 5, 1, 1, 0, 2, 0, -0.978641152381897, 0.9989452958106995, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.978641152381897, 0.9989452958106995, conv2d_2_w, -0.2095158, 0.33543423, conv2d_2_b, -0.45020863, 0.30596754, 2, 2, 1, 1, 0, 2, 0, -0.9997039437294006, 0.999930202960968, 9); - void* var_2 = ConvLayer_PROMISE(var_1, -0.9997039437294006, 0.999930202960968, conv2d_3_w, -0.1715614, 0.17037082, conv2d_3_b, -0.6519161, 0.5939945, 1, 1, 1, 1, -1, 0, 0, -0.9999336004257202, 0.999940037727356, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9999336004257202, 0.999940037727356, conv2d_4_w, -0.15575546, 0.14456555, conv2d_4_b, -0.55873865, 0.4704539, 1, 1, 1, 1, -1, 0, 0, -0.9999991059303284, 0.9999993443489075, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9999991059303284, 0.9999993443489075, conv2d_5_w, -0.16108225, 0.16864482, conv2d_5_b, -0.22135437, 0.10401678, 1, 1, 1, 1, 0, 2, 0, -0.9994344115257263, 0.9996342062950134, 9); - void* var_5 = FCLayer_PROMISE(var_4, -0.9994344115257263, 0.9996342062950134, dense_1_w, -0.18183032, 0.19018902, dense_1_b, -0.07189204, 0.106005594, -1, -15.076565380096437, 19.422585220336913, 9); - void* var_6 = tensorSoftmax(var_5); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_6); - final_accuracy += accuracy; - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_6, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - dumpExecutionAccuracies(); - - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc deleted file mode 100644 index c67eb1153e6c29a0f478e495be2d36dbdafe1d56..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc +++ /dev/null @@ -1,160 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 1000; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(1); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816426241908337, 2.0934095498544254, conv2d_1_w, -0.33087718, 0.3323643, conv2d_1_b, -0.7782218, 0.6020472, 5, 5, 1, 1, 0, 2, 0, -0.978641152381897, 0.9989452958106995, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.978641152381897, 0.9989452958106995, conv2d_2_w, -0.2095158, 0.33543423, conv2d_2_b, -0.45020863, 0.30596754, 2, 2, 1, 1, 0, 2, 0, -0.9997039437294006, 0.999930202960968, 9); - void* var_2 = ConvLayer_PROMISE(var_1, -0.9997039437294006, 0.999930202960968, conv2d_3_w, -0.1715614, 0.17037082, conv2d_3_b, -0.6519161, 0.5939945, 1, 1, 1, 1, -1, 0, 0, -0.9999336004257202, 0.999940037727356, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9999336004257202, 0.999940037727356, conv2d_4_w, -0.15575546, 0.14456555, conv2d_4_b, -0.55873865, 0.4704539, 1, 1, 1, 1, -1, 0, 0, -0.9999991059303284, 0.9999993443489075, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9999991059303284, 0.9999993443489075, conv2d_5_w, -0.16108225, 0.16864482, conv2d_5_b, -0.22135437, 0.10401678, 1, 1, 1, 1, 0, 2, 0, -0.9994344115257263, 0.9996342062950134, 9); - void* var_5 = FCLayer_PROMISE(var_4, -0.9994344115257263, 0.9996342062950134, dense_1_w, -0.18183032, 0.19018902, dense_1_b, -0.07189204, 0.106005594, -1, -15.076565380096437, 19.422585220336913, 9); - void* var_6 = tensorSoftmax(var_5); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_6); - final_accuracy += accuracy; - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_6, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - dumpExecutionAccuracies(); - - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_piped.cc deleted file mode 100644 index c246822a094faffebe01f58fa8fd2c15f004cea1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_piped.cc +++ /dev/null @@ -1,175 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - -int test_input_size = 5000; -int batch_size = 5000; -int offset = 5000; - - -bool shouldDumpClassConf = false; -float* classConfs; -int* predictedLabels; - - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - printf("********* Lenet-5 Architecture ********** \n"); - - std::string dir_prefix = std::string("../model_params/lenet_mnist/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - - // Loading Weights - void* conv1_filter = readTrainedWeights("../model_params/lenet_mnist/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_mnist/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_mnist/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_mnist/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_mnist/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_mnist/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_mnist/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_mnist/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - clearTensorMap(); - - int missed = 0; - for(int i = 0; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - - if (missed >= to_skip){ - break; - } - - - int start = offset; - int end = batch_size + offset; - - // Loading Input Batch - void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); - - // Loading Weights - - // DNN Operations - void* conv1_out = ConvLayer_PROMISE(input, 0,1, conv1_filter, -1,1, conv1_bias, -1,1, - 2, 2, 1, 1, 0, 2, 0, -1,1, 9); - void* conv2_out = ConvLayer_PROMISE(conv1_out, -1,1, conv2_filter, -1,1, - conv2_bias, -1,1, - 2, 2, 1, 1, 0, 2, 0, -1,1, 9); - - void* fc1_out = FCLayer_PROMISE(conv2_out, -1,1, fc1_weights, -1,1, fc1_bias, -1,1, - 0, -1,1, 9); - void* fc2_out = FCLayer_PROMISE(fc1_out, -1,1, fc2_weights, -1,1, fc2_bias, -1,1, - 0, -1,1, 9); - - void* result = tensorSoftmax(fc2_out); - - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(result, classConfs, predictedLabels, relative_start, relative_end); - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - freeOutputTensors(); - - dumpFinalAccuracy(accuracy); - - if (accuracy < bench_acc) - missed += 1; - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - - } - - dumpExecutionAccuracies(); -} - - -int main(int argc, char* argv[]){ - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - batch_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - - llvm_hpvm_initTensorRt(0); - - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_promise.cc deleted file mode 100644 index e1428589c3f48a5879b9f5c9c73980e4d3ca9ff0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_promise.cc +++ /dev/null @@ -1,167 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - -int test_input_size = 5000; -int batch_size = 5000; -int offset = 5000; - - -bool shouldDumpClassConf = false; -float* classConfs; -int* predictedLabels; - - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - printf("********* Lenet-5 Architecture ********** \n"); - - std::string dir_prefix = std::string("../model_params/lenet_mnist/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - - int missed = 0; - for(int i = 0; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - int start = offset; - int end = batch_size + offset; - - - startMemTracking(); - - // Loading Input Batch - void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); - - // Loading Weights - void* conv1_filter = readTrainedWeights("../model_params/lenet_mnist/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_mnist/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_mnist/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_mnist/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_mnist/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_mnist/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_mnist/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_mnist/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - // DNN Operations - void* conv1_out = ConvLayer_PROMISE(input, 0,1, conv1_filter, -1,1, conv1_bias, -1,1, - 2, 2, 1, 1, 0, 2, 0, -1,1, 9); - void* conv2_out = ConvLayer_PROMISE(conv1_out, -1,1, conv2_filter, -1,1, - conv2_bias, -1,1, - 2, 2, 1, 1, 0, 2, 0, -1,1, 9); - - void* fc1_out = FCLayer_PROMISE(conv2_out, -1,1, fc1_weights, -1,1, fc1_bias, -1,1, - 0, -1,1, 9); - void* fc2_out = FCLayer_PROMISE(fc1_out, -1,1, fc2_weights, -1,1, fc2_bias, -1,1, - 0, -1,1, 9); - - void* result = tensorSoftmax(fc2_out); - - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(result, classConfs, predictedLabels, relative_start, relative_end); - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - freeBatchMemory(); - - dumpFinalAccuracy(accuracy); - - - if (accuracy < bench_acc) - missed += 1; - - } - - dumpExecutionAccuracies(); -} - - -int main(int argc, char* argv[]){ - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - batch_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - - llvm_hpvm_initTensorRt(1); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - - - - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_piped.cc deleted file mode 100644 index 8444e512f5f36de261065653be8a2bdf44885d5d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_piped.cc +++ /dev/null @@ -1,492 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); - void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorHalfBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); - void* var_7 = tensorHalfBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorHalfBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); - void* var_13 = tensorHalfBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorHalfBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); - void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); - void* var_25 = tensorHalfBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorHalfBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); - void* var_31 = tensorHalfBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); - void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_40 = tensorHalfBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_41 = tensorRelu(var_40); - void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); - void* var_43 = tensorHalfBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_44 = tensorRelu(var_43); - void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_46 = tensorHalfBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_47 = tensorRelu(var_46); - void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); - void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); - void* var_55 = tensorHalfBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_56 = tensorRelu(var_55); - void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_58 = tensorHalfBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_59 = tensorRelu(var_58); - void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); - void* var_61 = tensorHalfBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_62 = tensorRelu(var_61); - void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); - void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_70 = tensorHalfBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_71 = tensorHalfRelu(var_70); - void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); - void* var_73 = tensorHalfBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_74 = tensorHalfRelu(var_73); - void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_76 = tensorHalfBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_77 = tensorRelu(var_76); - void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); - void* var_79 = tensorHalfBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_80 = tensorHalfRelu(var_79); - void* var_81 = tensorHalfPooling(var_80,1,2,2,0,0,2,2); - void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); - void* var_83 = tensorSoftmax(var_82); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_83); - final_accuracy += accuracy; - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_83, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc deleted file mode 100644 index 697cc5f1412ac012c344abbd5a25a8a79a2f1acd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc +++ /dev/null @@ -1,487 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 1000; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - - llvm_hpvm_initTensorRt(0); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - std::string dir_prefix = std::string("../model_params/mobilenet/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); - void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorHalfBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); - void* var_7 = tensorHalfBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorHalfBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); - void* var_13 = tensorHalfBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorHalfBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); - void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); - void* var_25 = tensorHalfBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorHalfBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); - void* var_31 = tensorHalfBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); - void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_40 = tensorHalfBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_41 = tensorRelu(var_40); - void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); - void* var_43 = tensorHalfBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_44 = tensorRelu(var_43); - void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_46 = tensorHalfBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_47 = tensorRelu(var_46); - void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); - void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); - void* var_55 = tensorHalfBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_56 = tensorRelu(var_55); - void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_58 = tensorHalfBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_59 = tensorRelu(var_58); - void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); - void* var_61 = tensorHalfBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_62 = tensorRelu(var_61); - void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); - void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_70 = tensorHalfBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_71 = tensorHalfRelu(var_70); - void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); - void* var_73 = tensorHalfBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_74 = tensorHalfRelu(var_73); - void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_76 = tensorHalfBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_77 = tensorRelu(var_76); - void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); - void* var_79 = tensorHalfBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_80 = tensorHalfRelu(var_79); - void* var_81 = tensorHalfPooling(var_80,1,2,2,0,0,2,2); - void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); - void* var_83 = tensorSoftmax(var_82); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_83); - final_accuracy += accuracy; - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_83, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_piped.cc deleted file mode 100644 index 3dffdffcf16fdcf7071e3957cf7dc496fa0c3c50..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_piped.cc +++ /dev/null @@ -1,313 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.5164621164798737, 1.6472081774473288, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -9.868980642318725, 10.560956018447879, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.821381127357554, conv2d_2_w, -1.1834390873908995, 1.2731596627235617, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -9.875998497009277, 7.51305247974393, 9); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.826067455768602, conv2d_3_w, -0.599876856982708, 0.6812073457241064, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.633289833068848, 5.177892235755925, 9); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.02646304416659, conv2d_4_w, -0.4555967862010002, 0.4942613914608956, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.316803941726685, 4.605850250244146, 9); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 4.532649063110355, conv2d_5_w, -0.35657615590095515, 0.3382165088057521, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.1012511816024775, 4.3630500688553, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.9874704387188977, conv2d_6_w, -0.28502783328294756, 0.28604640334844594, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.243851703643799, 3.486250406742097, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 6.563065901756522, conv2d_7_w, -0.18946402323246003, 0.19012390717864017, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.938115713119507, 3.538363476753238, 9); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,1,2,2,0,0,2,2); - void* var_40 = FCLayer_PROMISE(var_39, 0.0, 1.8908388000727185, dense_1_w, -0.35140394401550296, 0.422872786462307, dense_1_b, -0.23878151, 0.26507422, -1, -14.630816223144532, 27.27252123260504, 9); - void* var_41 = tensorSoftmax(var_40); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_41); - final_accuracy += accuracy; - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_41, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc deleted file mode 100644 index 757f950249566ce658a8e7e7289cc64ab034b396..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc +++ /dev/null @@ -1,304 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 1000; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - - llvm_hpvm_initTensorRt(0); - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.5164621164798737, 1.6472081774473288, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -9.868980642318725, 10.560956018447879, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.821381127357554, conv2d_2_w, -1.1834390873908995, 1.2731596627235617, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -9.875998497009277, 7.51305247974393, 9); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.826067455768602, conv2d_3_w, -0.599876856982708, 0.6812073457241064, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.633289833068848, 5.177892235755925, 9); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.02646304416659, conv2d_4_w, -0.4555967862010002, 0.4942613914608956, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.316803941726685, 4.605850250244146, 9); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 4.532649063110355, conv2d_5_w, -0.35657615590095515, 0.3382165088057521, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.1012511816024775, 4.3630500688553, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.9874704387188977, conv2d_6_w, -0.28502783328294756, 0.28604640334844594, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.243851703643799, 3.486250406742097, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 6.563065901756522, conv2d_7_w, -0.18946402323246003, 0.19012390717864017, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.938115713119507, 3.538363476753238, 9); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,1,2,2,0,0,2,2); - void* var_40 = FCLayer_PROMISE(var_39, 0.0, 1.8908388000727185, dense_1_w, -0.35140394401550296, 0.422872786462307, dense_1_b, -0.23878151, 0.26507422, -1, -14.630816223144532, 27.27252123260504, 9); - void* var_41 = tensorSoftmax(var_40); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_41); - final_accuracy += accuracy; - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_41, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_piped.cc deleted file mode 100644 index 8446e1f2583c99758f9fdd84c71dd0c8d31cd182..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_piped.cc +++ /dev/null @@ -1,265 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 250; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -1.0248864, 1.2929907, conv2d_1_b, -0.36291853, 0.2533059, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.8791630274057383, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 0.8791630274057383, conv2d_2_w, -0.69884616, 0.71849966, conv2d_2_b, -0.2781147, 0.45571187, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1859495645761484, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 1.1859495645761484, conv2d_3_w, -0.59568167, 0.7714691, conv2d_3_b, -0.8602873, 0.19743633, 1, 1, 1, 1, -1, 0, -1, -2.2316832554340365, 2.266301159858699, 9); - void* var_3 = tensorAdd(var_0, var_2); - void* var_4 = tensorRelu(var_3); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.789569139480591, conv2d_4_w, -0.41976976, 0.43748936, conv2d_4_b, -0.7021962, 0.3033103, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3341254055499974, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 1.3341254055499974, conv2d_5_w, -0.46757826, 0.4635873, conv2d_5_b, -0.20662616, 0.1778044, 1, 1, 1, 1, -1, 0, -1, -0.9912706619501114, 1.0245310074090952, 9); - void* var_7 = tensorAdd(var_4, var_6); - void* var_8 = tensorRelu(var_7); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 2.998989346027372, conv2d_6_w, -0.64404047, 0.45383143, conv2d_6_b, -0.819547, 0.38550296, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2850778144597967, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 1.2850778144597967, conv2d_7_w, -0.41986948, 0.33654243, conv2d_7_b, -0.3563013, 0.22371122, 1, 1, 1, 1, -1, 0, -1, -1.2940701305866242, 0.7332147359848022, 9); - void* var_11 = tensorAdd(var_8, var_10); - void* var_12 = tensorRelu(var_11); - void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_8_w, -0.4805263, 0.50655717, conv2d_8_b, -0.296758, 0.7742441, 1, 1, 2, 2, -1, 0, 1, 0.0, 3.6232483506202584, 9); - void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 3.6232483506202584, conv2d_9_w, -0.52083415, 0.45517674, conv2d_9_b, -0.20242067, 0.8236838, 1, 1, 1, 1, -1, 0, -1, -6.319877154827118, 6.882811555862418, 9); - void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_10_w, -0.5338656, 1.3395424, conv2d_10_b, -0.20242067, 0.8236838, 0, 0, 2, 2, -1, 0, -1, -0.9930689406394959, 2.8721754658222096, 9); - void* var_16 = tensorAdd(var_15, var_14); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 8.315858840942383, conv2d_11_w, -0.34429058, 0.43629733, conv2d_11_b, -1.0744808, 0.056708273, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.6893706333637226, 9); - void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 2.6893706333637226, conv2d_12_w, -0.30342352, 0.39493486, conv2d_12_b, -0.44630566, 0.6492069, 1, 1, 1, 1, -1, 0, -1, -1.8801953810453416, 1.714934362173068, 9); - void* var_20 = tensorAdd(var_17, var_19); - void* var_21 = tensorRelu(var_20); - void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 8.381670951843262, conv2d_13_w, -0.38351893, 0.45775774, conv2d_13_b, -1.4733055, -0.014426912, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.569231034517287, 9); - void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 2.569231034517287, conv2d_14_w, -0.25695276, 0.45372736, conv2d_14_b, -0.5259744, 0.26591402, 1, 1, 1, 1, -1, 0, -1, -1.9701244848966597, 1.4661400413513093, 9); - void* var_24 = tensorAdd(var_21, var_23); - void* var_25 = tensorRelu(var_24); - void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_15_w, -0.55299705, 0.5443531, conv2d_15_b, -0.71790683, 1.2730768, 1, 1, 2, 2, -1, 0, 1, 0.0, 12.411911067962677, 9); - void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 12.411911067962677, conv2d_16_w, -0.4203967, 0.48641303, conv2d_16_b, -0.90653443, 1.3546854, 1, 1, 1, 1, -1, 0, -1, -25.407194147109987, 20.519153985977383, 9); - void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_17_w, -0.4365755, 0.84913826, conv2d_17_b, -0.90653443, 1.3546851, 0, 0, 2, 2, -1, 0, -1, -4.256520752906799, 5.730506427288059, 9); - void* var_29 = tensorAdd(var_28, var_27); - void* var_30 = tensorRelu(var_29); - void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 22.350475664138983, conv2d_18_w, -0.38657624, 0.5228989, conv2d_18_b, -1.2083547, 0.76361173, 1, 1, 1, 1, -1, 0, 1, 0.0, 23.93387042045599, 9); - void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 23.93387042045599, conv2d_19_w, -0.40857902, 0.575035, conv2d_19_b, -1.8731614, 1.0960501, 1, 1, 1, 1, -1, 0, -1, -35.37134181976318, 19.209569931030273, 9); - void* var_33 = tensorAdd(var_30, var_32); - void* var_34 = tensorRelu(var_33); - void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 29.434949998855657, conv2d_20_w, -0.33079496, 0.5893278, conv2d_20_b, -1.0234511, 1.0016295, 1, 1, 1, 1, -1, 0, 1, 0.0, 27.216757345199866, 9); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 27.216757345199866, conv2d_21_w, -0.27897888, 0.38280907, conv2d_21_b, -2.2086356, 1.0066502, 1, 1, 1, 1, -1, 0, -1, -42.31447326660156, 29.365212144852038, 9); - void* var_37 = tensorAdd(var_34, var_36); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); - void* var_40 = FCLayer_PROMISE(var_39, 0.0, 13.736315393447876, dense_1_w, -1.5092047, 1.0279838, dense_1_b, -0.49379802, 0.61032647, -1, -45.52749088287353, 31.64324799537669, 9); - void* var_41 = tensorSoftmax(var_40); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_41); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_41, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc deleted file mode 100644 index 2ade8b6090d69d733399a399619442cede2bfde9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc +++ /dev/null @@ -1,259 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 1000; - int offset = 5000; - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -1.0248864, 1.2929907, conv2d_1_b, -0.36291853, 0.2533059, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.8791630274057383, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 0.8791630274057383, conv2d_2_w, -0.69884616, 0.71849966, conv2d_2_b, -0.2781147, 0.45571187, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1859495645761484, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 1.1859495645761484, conv2d_3_w, -0.59568167, 0.7714691, conv2d_3_b, -0.8602873, 0.19743633, 1, 1, 1, 1, -1, 0, -1, -2.2316832554340365, 2.266301159858699, 9); - void* var_3 = tensorAdd(var_0, var_2); - void* var_4 = tensorRelu(var_3); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.789569139480591, conv2d_4_w, -0.41976976, 0.43748936, conv2d_4_b, -0.7021962, 0.3033103, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3341254055499974, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 1.3341254055499974, conv2d_5_w, -0.46757826, 0.4635873, conv2d_5_b, -0.20662616, 0.1778044, 1, 1, 1, 1, -1, 0, -1, -0.9912706619501114, 1.0245310074090952, 9); - void* var_7 = tensorAdd(var_4, var_6); - void* var_8 = tensorRelu(var_7); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 2.998989346027372, conv2d_6_w, -0.64404047, 0.45383143, conv2d_6_b, -0.819547, 0.38550296, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2850778144597967, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 1.2850778144597967, conv2d_7_w, -0.41986948, 0.33654243, conv2d_7_b, -0.3563013, 0.22371122, 1, 1, 1, 1, -1, 0, -1, -1.2940701305866242, 0.7332147359848022, 9); - void* var_11 = tensorAdd(var_8, var_10); - void* var_12 = tensorRelu(var_11); - void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_8_w, -0.4805263, 0.50655717, conv2d_8_b, -0.296758, 0.7742441, 1, 1, 2, 2, -1, 0, 1, 0.0, 3.6232483506202584, 9); - void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 3.6232483506202584, conv2d_9_w, -0.52083415, 0.45517674, conv2d_9_b, -0.20242067, 0.8236838, 1, 1, 1, 1, -1, 0, -1, -6.319877154827118, 6.882811555862418, 9); - void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_10_w, -0.5338656, 1.3395424, conv2d_10_b, -0.20242067, 0.8236838, 0, 0, 2, 2, -1, 0, -1, -0.9930689406394959, 2.8721754658222096, 9); - void* var_16 = tensorAdd(var_15, var_14); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 8.315858840942383, conv2d_11_w, -0.34429058, 0.43629733, conv2d_11_b, -1.0744808, 0.056708273, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.6893706333637226, 9); - void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 2.6893706333637226, conv2d_12_w, -0.30342352, 0.39493486, conv2d_12_b, -0.44630566, 0.6492069, 1, 1, 1, 1, -1, 0, -1, -1.8801953810453416, 1.714934362173068, 9); - void* var_20 = tensorAdd(var_17, var_19); - void* var_21 = tensorRelu(var_20); - void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 8.381670951843262, conv2d_13_w, -0.38351893, 0.45775774, conv2d_13_b, -1.4733055, -0.014426912, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.569231034517287, 9); - void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 2.569231034517287, conv2d_14_w, -0.25695276, 0.45372736, conv2d_14_b, -0.5259744, 0.26591402, 1, 1, 1, 1, -1, 0, -1, -1.9701244848966597, 1.4661400413513093, 9); - void* var_24 = tensorAdd(var_21, var_23); - void* var_25 = tensorRelu(var_24); - void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_15_w, -0.55299705, 0.5443531, conv2d_15_b, -0.71790683, 1.2730768, 1, 1, 2, 2, -1, 0, 1, 0.0, 12.411911067962677, 9); - void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 12.411911067962677, conv2d_16_w, -0.4203967, 0.48641303, conv2d_16_b, -0.90653443, 1.3546854, 1, 1, 1, 1, -1, 0, -1, -25.407194147109987, 20.519153985977383, 9); - void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_17_w, -0.4365755, 0.84913826, conv2d_17_b, -0.90653443, 1.3546851, 0, 0, 2, 2, -1, 0, -1, -4.256520752906799, 5.730506427288059, 9); - void* var_29 = tensorAdd(var_28, var_27); - void* var_30 = tensorRelu(var_29); - void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 22.350475664138983, conv2d_18_w, -0.38657624, 0.5228989, conv2d_18_b, -1.2083547, 0.76361173, 1, 1, 1, 1, -1, 0, 1, 0.0, 23.93387042045599, 9); - void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 23.93387042045599, conv2d_19_w, -0.40857902, 0.575035, conv2d_19_b, -1.8731614, 1.0960501, 1, 1, 1, 1, -1, 0, -1, -35.37134181976318, 19.209569931030273, 9); - void* var_33 = tensorAdd(var_30, var_32); - void* var_34 = tensorRelu(var_33); - void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 29.434949998855657, conv2d_20_w, -0.33079496, 0.5893278, conv2d_20_b, -1.0234511, 1.0016295, 1, 1, 1, 1, -1, 0, 1, 0.0, 27.216757345199866, 9); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 27.216757345199866, conv2d_21_w, -0.27897888, 0.38280907, conv2d_21_b, -2.2086356, 1.0066502, 1, 1, 1, 1, -1, 0, -1, -42.31447326660156, 29.365212144852038, 9); - void* var_37 = tensorAdd(var_34, var_36); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); - void* var_40 = FCLayer_PROMISE(var_39, 0.0, 13.736315393447876, dense_1_w, -1.5092047, 1.0279838, dense_1_b, -0.49379802, 0.61032647, -1, -45.52749088287353, 31.64324799537669, 9); - void* var_41 = tensorSoftmax(var_40); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_41); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_41, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet50_imagenet_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet50_imagenet_piped.cc deleted file mode 100644 index 1e61f9e993e0de5678c203b4e09d570c15f4d63c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet50_imagenet_piped.cc +++ /dev/null @@ -1,925 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - - int total_runs = 1; - int offset = 0; - - int test_input_size = 2000; - int batch_size = 50; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - - - - std::string dir_prefix = std::string("/shared/hsharif3/resnet50_imagenet/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,7,7); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,1,1); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,64,256,1,1); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,64,256,1,1); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,128,256,1,1); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,512,256,1,1); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_22_w_path = dir_prefix + std::string("conv2d_22_w.bin"); - void* conv2d_22_w = readTrainedWeights(conv2d_22_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_22_b_path = dir_prefix + std::string("conv2d_22_b.bin"); - void* conv2d_22_b = readTrainedWeights(conv2d_22_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_23_w_path = dir_prefix + std::string("conv2d_23_w.bin"); - void* conv2d_23_w = readTrainedWeights(conv2d_23_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_23_b_path = dir_prefix + std::string("conv2d_23_b.bin"); - void* conv2d_23_b = readTrainedWeights(conv2d_23_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_24_w_path = dir_prefix + std::string("conv2d_24_w.bin"); - void* conv2d_24_w = readTrainedWeights(conv2d_24_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_24_b_path = dir_prefix + std::string("conv2d_24_b.bin"); - void* conv2d_24_b = readTrainedWeights(conv2d_24_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_25_w_path = dir_prefix + std::string("conv2d_25_w.bin"); - void* conv2d_25_w = readTrainedWeights(conv2d_25_w_path.c_str(), 0,256,512,1,1); - std::string conv2d_25_b_path = dir_prefix + std::string("conv2d_25_b.bin"); - void* conv2d_25_b = readTrainedWeights(conv2d_25_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_26_w_path = dir_prefix + std::string("conv2d_26_w.bin"); - void* conv2d_26_w = readTrainedWeights(conv2d_26_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_26_b_path = dir_prefix + std::string("conv2d_26_b.bin"); - void* conv2d_26_b = readTrainedWeights(conv2d_26_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_27_w_path = dir_prefix + std::string("conv2d_27_w.bin"); - void* conv2d_27_w = readTrainedWeights(conv2d_27_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_27_b_path = dir_prefix + std::string("conv2d_27_b.bin"); - void* conv2d_27_b = readTrainedWeights(conv2d_27_b_path.c_str(), 0,1,1024,1,1); - std::string conv2d_28_w_path = dir_prefix + std::string("conv2d_28_w.bin"); - void* conv2d_28_w = readTrainedWeights(conv2d_28_w_path.c_str(), 0,1024,512,1,1); - std::string conv2d_28_b_path = dir_prefix + std::string("conv2d_28_b.bin"); - void* conv2d_28_b = readTrainedWeights(conv2d_28_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_gamma_path = dir_prefix + std::string("batch_normalization_28_gamma.bin"); - void* batch_normalization_28_gamma = readTrainedWeights(batch_normalization_28_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_beta_path = dir_prefix + std::string("batch_normalization_28_beta.bin"); - void* batch_normalization_28_beta = readTrainedWeights(batch_normalization_28_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_mean_path = dir_prefix + std::string("batch_normalization_28_mean.bin"); - void* batch_normalization_28_mean = readTrainedWeights(batch_normalization_28_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_variance_path = dir_prefix + std::string("batch_normalization_28_variance.bin"); - void* batch_normalization_28_variance = readTrainedWeights(batch_normalization_28_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_29_w_path = dir_prefix + std::string("conv2d_29_w.bin"); - void* conv2d_29_w = readTrainedWeights(conv2d_29_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_29_b_path = dir_prefix + std::string("conv2d_29_b.bin"); - void* conv2d_29_b = readTrainedWeights(conv2d_29_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_gamma_path = dir_prefix + std::string("batch_normalization_29_gamma.bin"); - void* batch_normalization_29_gamma = readTrainedWeights(batch_normalization_29_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_beta_path = dir_prefix + std::string("batch_normalization_29_beta.bin"); - void* batch_normalization_29_beta = readTrainedWeights(batch_normalization_29_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_mean_path = dir_prefix + std::string("batch_normalization_29_mean.bin"); - void* batch_normalization_29_mean = readTrainedWeights(batch_normalization_29_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_variance_path = dir_prefix + std::string("batch_normalization_29_variance.bin"); - void* batch_normalization_29_variance = readTrainedWeights(batch_normalization_29_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_30_w_path = dir_prefix + std::string("conv2d_30_w.bin"); - void* conv2d_30_w = readTrainedWeights(conv2d_30_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_30_b_path = dir_prefix + std::string("conv2d_30_b.bin"); - void* conv2d_30_b = readTrainedWeights(conv2d_30_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_gamma_path = dir_prefix + std::string("batch_normalization_30_gamma.bin"); - void* batch_normalization_30_gamma = readTrainedWeights(batch_normalization_30_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_beta_path = dir_prefix + std::string("batch_normalization_30_beta.bin"); - void* batch_normalization_30_beta = readTrainedWeights(batch_normalization_30_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_mean_path = dir_prefix + std::string("batch_normalization_30_mean.bin"); - void* batch_normalization_30_mean = readTrainedWeights(batch_normalization_30_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_variance_path = dir_prefix + std::string("batch_normalization_30_variance.bin"); - void* batch_normalization_30_variance = readTrainedWeights(batch_normalization_30_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_31_w_path = dir_prefix + std::string("conv2d_31_w.bin"); - void* conv2d_31_w = readTrainedWeights(conv2d_31_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_31_b_path = dir_prefix + std::string("conv2d_31_b.bin"); - void* conv2d_31_b = readTrainedWeights(conv2d_31_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_gamma_path = dir_prefix + std::string("batch_normalization_31_gamma.bin"); - void* batch_normalization_31_gamma = readTrainedWeights(batch_normalization_31_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_beta_path = dir_prefix + std::string("batch_normalization_31_beta.bin"); - void* batch_normalization_31_beta = readTrainedWeights(batch_normalization_31_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_mean_path = dir_prefix + std::string("batch_normalization_31_mean.bin"); - void* batch_normalization_31_mean = readTrainedWeights(batch_normalization_31_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_variance_path = dir_prefix + std::string("batch_normalization_31_variance.bin"); - void* batch_normalization_31_variance = readTrainedWeights(batch_normalization_31_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_32_w_path = dir_prefix + std::string("conv2d_32_w.bin"); - void* conv2d_32_w = readTrainedWeights(conv2d_32_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_32_b_path = dir_prefix + std::string("conv2d_32_b.bin"); - void* conv2d_32_b = readTrainedWeights(conv2d_32_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_gamma_path = dir_prefix + std::string("batch_normalization_32_gamma.bin"); - void* batch_normalization_32_gamma = readTrainedWeights(batch_normalization_32_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_beta_path = dir_prefix + std::string("batch_normalization_32_beta.bin"); - void* batch_normalization_32_beta = readTrainedWeights(batch_normalization_32_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_mean_path = dir_prefix + std::string("batch_normalization_32_mean.bin"); - void* batch_normalization_32_mean = readTrainedWeights(batch_normalization_32_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_variance_path = dir_prefix + std::string("batch_normalization_32_variance.bin"); - void* batch_normalization_32_variance = readTrainedWeights(batch_normalization_32_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_33_w_path = dir_prefix + std::string("conv2d_33_w.bin"); - void* conv2d_33_w = readTrainedWeights(conv2d_33_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_33_b_path = dir_prefix + std::string("conv2d_33_b.bin"); - void* conv2d_33_b = readTrainedWeights(conv2d_33_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_gamma_path = dir_prefix + std::string("batch_normalization_33_gamma.bin"); - void* batch_normalization_33_gamma = readTrainedWeights(batch_normalization_33_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_beta_path = dir_prefix + std::string("batch_normalization_33_beta.bin"); - void* batch_normalization_33_beta = readTrainedWeights(batch_normalization_33_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_mean_path = dir_prefix + std::string("batch_normalization_33_mean.bin"); - void* batch_normalization_33_mean = readTrainedWeights(batch_normalization_33_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_variance_path = dir_prefix + std::string("batch_normalization_33_variance.bin"); - void* batch_normalization_33_variance = readTrainedWeights(batch_normalization_33_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_34_w_path = dir_prefix + std::string("conv2d_34_w.bin"); - void* conv2d_34_w = readTrainedWeights(conv2d_34_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_34_b_path = dir_prefix + std::string("conv2d_34_b.bin"); - void* conv2d_34_b = readTrainedWeights(conv2d_34_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_gamma_path = dir_prefix + std::string("batch_normalization_34_gamma.bin"); - void* batch_normalization_34_gamma = readTrainedWeights(batch_normalization_34_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_beta_path = dir_prefix + std::string("batch_normalization_34_beta.bin"); - void* batch_normalization_34_beta = readTrainedWeights(batch_normalization_34_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_mean_path = dir_prefix + std::string("batch_normalization_34_mean.bin"); - void* batch_normalization_34_mean = readTrainedWeights(batch_normalization_34_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_variance_path = dir_prefix + std::string("batch_normalization_34_variance.bin"); - void* batch_normalization_34_variance = readTrainedWeights(batch_normalization_34_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_35_w_path = dir_prefix + std::string("conv2d_35_w.bin"); - void* conv2d_35_w = readTrainedWeights(conv2d_35_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_35_b_path = dir_prefix + std::string("conv2d_35_b.bin"); - void* conv2d_35_b = readTrainedWeights(conv2d_35_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_gamma_path = dir_prefix + std::string("batch_normalization_35_gamma.bin"); - void* batch_normalization_35_gamma = readTrainedWeights(batch_normalization_35_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_beta_path = dir_prefix + std::string("batch_normalization_35_beta.bin"); - void* batch_normalization_35_beta = readTrainedWeights(batch_normalization_35_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_mean_path = dir_prefix + std::string("batch_normalization_35_mean.bin"); - void* batch_normalization_35_mean = readTrainedWeights(batch_normalization_35_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_variance_path = dir_prefix + std::string("batch_normalization_35_variance.bin"); - void* batch_normalization_35_variance = readTrainedWeights(batch_normalization_35_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_36_w_path = dir_prefix + std::string("conv2d_36_w.bin"); - void* conv2d_36_w = readTrainedWeights(conv2d_36_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_36_b_path = dir_prefix + std::string("conv2d_36_b.bin"); - void* conv2d_36_b = readTrainedWeights(conv2d_36_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_gamma_path = dir_prefix + std::string("batch_normalization_36_gamma.bin"); - void* batch_normalization_36_gamma = readTrainedWeights(batch_normalization_36_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_beta_path = dir_prefix + std::string("batch_normalization_36_beta.bin"); - void* batch_normalization_36_beta = readTrainedWeights(batch_normalization_36_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_mean_path = dir_prefix + std::string("batch_normalization_36_mean.bin"); - void* batch_normalization_36_mean = readTrainedWeights(batch_normalization_36_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_variance_path = dir_prefix + std::string("batch_normalization_36_variance.bin"); - void* batch_normalization_36_variance = readTrainedWeights(batch_normalization_36_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_37_w_path = dir_prefix + std::string("conv2d_37_w.bin"); - void* conv2d_37_w = readTrainedWeights(conv2d_37_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_37_b_path = dir_prefix + std::string("conv2d_37_b.bin"); - void* conv2d_37_b = readTrainedWeights(conv2d_37_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_gamma_path = dir_prefix + std::string("batch_normalization_37_gamma.bin"); - void* batch_normalization_37_gamma = readTrainedWeights(batch_normalization_37_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_beta_path = dir_prefix + std::string("batch_normalization_37_beta.bin"); - void* batch_normalization_37_beta = readTrainedWeights(batch_normalization_37_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_mean_path = dir_prefix + std::string("batch_normalization_37_mean.bin"); - void* batch_normalization_37_mean = readTrainedWeights(batch_normalization_37_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_variance_path = dir_prefix + std::string("batch_normalization_37_variance.bin"); - void* batch_normalization_37_variance = readTrainedWeights(batch_normalization_37_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_38_w_path = dir_prefix + std::string("conv2d_38_w.bin"); - void* conv2d_38_w = readTrainedWeights(conv2d_38_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_38_b_path = dir_prefix + std::string("conv2d_38_b.bin"); - void* conv2d_38_b = readTrainedWeights(conv2d_38_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_gamma_path = dir_prefix + std::string("batch_normalization_38_gamma.bin"); - void* batch_normalization_38_gamma = readTrainedWeights(batch_normalization_38_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_beta_path = dir_prefix + std::string("batch_normalization_38_beta.bin"); - void* batch_normalization_38_beta = readTrainedWeights(batch_normalization_38_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_mean_path = dir_prefix + std::string("batch_normalization_38_mean.bin"); - void* batch_normalization_38_mean = readTrainedWeights(batch_normalization_38_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_variance_path = dir_prefix + std::string("batch_normalization_38_variance.bin"); - void* batch_normalization_38_variance = readTrainedWeights(batch_normalization_38_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_39_w_path = dir_prefix + std::string("conv2d_39_w.bin"); - void* conv2d_39_w = readTrainedWeights(conv2d_39_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_39_b_path = dir_prefix + std::string("conv2d_39_b.bin"); - void* conv2d_39_b = readTrainedWeights(conv2d_39_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_gamma_path = dir_prefix + std::string("batch_normalization_39_gamma.bin"); - void* batch_normalization_39_gamma = readTrainedWeights(batch_normalization_39_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_beta_path = dir_prefix + std::string("batch_normalization_39_beta.bin"); - void* batch_normalization_39_beta = readTrainedWeights(batch_normalization_39_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_mean_path = dir_prefix + std::string("batch_normalization_39_mean.bin"); - void* batch_normalization_39_mean = readTrainedWeights(batch_normalization_39_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_variance_path = dir_prefix + std::string("batch_normalization_39_variance.bin"); - void* batch_normalization_39_variance = readTrainedWeights(batch_normalization_39_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_40_w_path = dir_prefix + std::string("conv2d_40_w.bin"); - void* conv2d_40_w = readTrainedWeights(conv2d_40_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_40_b_path = dir_prefix + std::string("conv2d_40_b.bin"); - void* conv2d_40_b = readTrainedWeights(conv2d_40_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_gamma_path = dir_prefix + std::string("batch_normalization_40_gamma.bin"); - void* batch_normalization_40_gamma = readTrainedWeights(batch_normalization_40_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_beta_path = dir_prefix + std::string("batch_normalization_40_beta.bin"); - void* batch_normalization_40_beta = readTrainedWeights(batch_normalization_40_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_mean_path = dir_prefix + std::string("batch_normalization_40_mean.bin"); - void* batch_normalization_40_mean = readTrainedWeights(batch_normalization_40_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_variance_path = dir_prefix + std::string("batch_normalization_40_variance.bin"); - void* batch_normalization_40_variance = readTrainedWeights(batch_normalization_40_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_41_w_path = dir_prefix + std::string("conv2d_41_w.bin"); - void* conv2d_41_w = readTrainedWeights(conv2d_41_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_41_b_path = dir_prefix + std::string("conv2d_41_b.bin"); - void* conv2d_41_b = readTrainedWeights(conv2d_41_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_gamma_path = dir_prefix + std::string("batch_normalization_41_gamma.bin"); - void* batch_normalization_41_gamma = readTrainedWeights(batch_normalization_41_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_beta_path = dir_prefix + std::string("batch_normalization_41_beta.bin"); - void* batch_normalization_41_beta = readTrainedWeights(batch_normalization_41_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_mean_path = dir_prefix + std::string("batch_normalization_41_mean.bin"); - void* batch_normalization_41_mean = readTrainedWeights(batch_normalization_41_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_variance_path = dir_prefix + std::string("batch_normalization_41_variance.bin"); - void* batch_normalization_41_variance = readTrainedWeights(batch_normalization_41_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_42_w_path = dir_prefix + std::string("conv2d_42_w.bin"); - void* conv2d_42_w = readTrainedWeights(conv2d_42_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_42_b_path = dir_prefix + std::string("conv2d_42_b.bin"); - void* conv2d_42_b = readTrainedWeights(conv2d_42_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_gamma_path = dir_prefix + std::string("batch_normalization_42_gamma.bin"); - void* batch_normalization_42_gamma = readTrainedWeights(batch_normalization_42_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_beta_path = dir_prefix + std::string("batch_normalization_42_beta.bin"); - void* batch_normalization_42_beta = readTrainedWeights(batch_normalization_42_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_mean_path = dir_prefix + std::string("batch_normalization_42_mean.bin"); - void* batch_normalization_42_mean = readTrainedWeights(batch_normalization_42_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_variance_path = dir_prefix + std::string("batch_normalization_42_variance.bin"); - void* batch_normalization_42_variance = readTrainedWeights(batch_normalization_42_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_43_w_path = dir_prefix + std::string("conv2d_43_w.bin"); - void* conv2d_43_w = readTrainedWeights(conv2d_43_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_43_b_path = dir_prefix + std::string("conv2d_43_b.bin"); - void* conv2d_43_b = readTrainedWeights(conv2d_43_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_gamma_path = dir_prefix + std::string("batch_normalization_43_gamma.bin"); - void* batch_normalization_43_gamma = readTrainedWeights(batch_normalization_43_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_beta_path = dir_prefix + std::string("batch_normalization_43_beta.bin"); - void* batch_normalization_43_beta = readTrainedWeights(batch_normalization_43_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_mean_path = dir_prefix + std::string("batch_normalization_43_mean.bin"); - void* batch_normalization_43_mean = readTrainedWeights(batch_normalization_43_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_variance_path = dir_prefix + std::string("batch_normalization_43_variance.bin"); - void* batch_normalization_43_variance = readTrainedWeights(batch_normalization_43_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_44_w_path = dir_prefix + std::string("conv2d_44_w.bin"); - void* conv2d_44_w = readTrainedWeights(conv2d_44_w_path.c_str(), 0,512,1024,1,1); - std::string conv2d_44_b_path = dir_prefix + std::string("conv2d_44_b.bin"); - void* conv2d_44_b = readTrainedWeights(conv2d_44_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_gamma_path = dir_prefix + std::string("batch_normalization_44_gamma.bin"); - void* batch_normalization_44_gamma = readTrainedWeights(batch_normalization_44_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_beta_path = dir_prefix + std::string("batch_normalization_44_beta.bin"); - void* batch_normalization_44_beta = readTrainedWeights(batch_normalization_44_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_mean_path = dir_prefix + std::string("batch_normalization_44_mean.bin"); - void* batch_normalization_44_mean = readTrainedWeights(batch_normalization_44_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_variance_path = dir_prefix + std::string("batch_normalization_44_variance.bin"); - void* batch_normalization_44_variance = readTrainedWeights(batch_normalization_44_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_45_w_path = dir_prefix + std::string("conv2d_45_w.bin"); - void* conv2d_45_w = readTrainedWeights(conv2d_45_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_45_b_path = dir_prefix + std::string("conv2d_45_b.bin"); - void* conv2d_45_b = readTrainedWeights(conv2d_45_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_gamma_path = dir_prefix + std::string("batch_normalization_45_gamma.bin"); - void* batch_normalization_45_gamma = readTrainedWeights(batch_normalization_45_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_beta_path = dir_prefix + std::string("batch_normalization_45_beta.bin"); - void* batch_normalization_45_beta = readTrainedWeights(batch_normalization_45_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_mean_path = dir_prefix + std::string("batch_normalization_45_mean.bin"); - void* batch_normalization_45_mean = readTrainedWeights(batch_normalization_45_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_variance_path = dir_prefix + std::string("batch_normalization_45_variance.bin"); - void* batch_normalization_45_variance = readTrainedWeights(batch_normalization_45_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_46_w_path = dir_prefix + std::string("conv2d_46_w.bin"); - void* conv2d_46_w = readTrainedWeights(conv2d_46_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_46_b_path = dir_prefix + std::string("conv2d_46_b.bin"); - void* conv2d_46_b = readTrainedWeights(conv2d_46_b_path.c_str(), 0,1,2048,1,1); - std::string conv2d_47_w_path = dir_prefix + std::string("conv2d_47_w.bin"); - void* conv2d_47_w = readTrainedWeights(conv2d_47_w_path.c_str(), 0,2048,1024,1,1); - std::string conv2d_47_b_path = dir_prefix + std::string("conv2d_47_b.bin"); - void* conv2d_47_b = readTrainedWeights(conv2d_47_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_gamma_path = dir_prefix + std::string("batch_normalization_46_gamma.bin"); - void* batch_normalization_46_gamma = readTrainedWeights(batch_normalization_46_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_beta_path = dir_prefix + std::string("batch_normalization_46_beta.bin"); - void* batch_normalization_46_beta = readTrainedWeights(batch_normalization_46_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_mean_path = dir_prefix + std::string("batch_normalization_46_mean.bin"); - void* batch_normalization_46_mean = readTrainedWeights(batch_normalization_46_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_variance_path = dir_prefix + std::string("batch_normalization_46_variance.bin"); - void* batch_normalization_46_variance = readTrainedWeights(batch_normalization_46_variance_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_gamma_path = dir_prefix + std::string("batch_normalization_47_gamma.bin"); - void* batch_normalization_47_gamma = readTrainedWeights(batch_normalization_47_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_beta_path = dir_prefix + std::string("batch_normalization_47_beta.bin"); - void* batch_normalization_47_beta = readTrainedWeights(batch_normalization_47_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_mean_path = dir_prefix + std::string("batch_normalization_47_mean.bin"); - void* batch_normalization_47_mean = readTrainedWeights(batch_normalization_47_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_variance_path = dir_prefix + std::string("batch_normalization_47_variance.bin"); - void* batch_normalization_47_variance = readTrainedWeights(batch_normalization_47_variance_path.c_str(), 0,1,2048,1,1); - std::string conv2d_48_w_path = dir_prefix + std::string("conv2d_48_w.bin"); - void* conv2d_48_w = readTrainedWeights(conv2d_48_w_path.c_str(), 0,512,2048,1,1); - std::string conv2d_48_b_path = dir_prefix + std::string("conv2d_48_b.bin"); - void* conv2d_48_b = readTrainedWeights(conv2d_48_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_gamma_path = dir_prefix + std::string("batch_normalization_48_gamma.bin"); - void* batch_normalization_48_gamma = readTrainedWeights(batch_normalization_48_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_beta_path = dir_prefix + std::string("batch_normalization_48_beta.bin"); - void* batch_normalization_48_beta = readTrainedWeights(batch_normalization_48_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_mean_path = dir_prefix + std::string("batch_normalization_48_mean.bin"); - void* batch_normalization_48_mean = readTrainedWeights(batch_normalization_48_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_variance_path = dir_prefix + std::string("batch_normalization_48_variance.bin"); - void* batch_normalization_48_variance = readTrainedWeights(batch_normalization_48_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_49_w_path = dir_prefix + std::string("conv2d_49_w.bin"); - void* conv2d_49_w = readTrainedWeights(conv2d_49_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_49_b_path = dir_prefix + std::string("conv2d_49_b.bin"); - void* conv2d_49_b = readTrainedWeights(conv2d_49_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_gamma_path = dir_prefix + std::string("batch_normalization_49_gamma.bin"); - void* batch_normalization_49_gamma = readTrainedWeights(batch_normalization_49_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_beta_path = dir_prefix + std::string("batch_normalization_49_beta.bin"); - void* batch_normalization_49_beta = readTrainedWeights(batch_normalization_49_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_mean_path = dir_prefix + std::string("batch_normalization_49_mean.bin"); - void* batch_normalization_49_mean = readTrainedWeights(batch_normalization_49_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_variance_path = dir_prefix + std::string("batch_normalization_49_variance.bin"); - void* batch_normalization_49_variance = readTrainedWeights(batch_normalization_49_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_50_w_path = dir_prefix + std::string("conv2d_50_w.bin"); - void* conv2d_50_w = readTrainedWeights(conv2d_50_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_50_b_path = dir_prefix + std::string("conv2d_50_b.bin"); - void* conv2d_50_b = readTrainedWeights(conv2d_50_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_gamma_path = dir_prefix + std::string("batch_normalization_50_gamma.bin"); - void* batch_normalization_50_gamma = readTrainedWeights(batch_normalization_50_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_beta_path = dir_prefix + std::string("batch_normalization_50_beta.bin"); - void* batch_normalization_50_beta = readTrainedWeights(batch_normalization_50_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_mean_path = dir_prefix + std::string("batch_normalization_50_mean.bin"); - void* batch_normalization_50_mean = readTrainedWeights(batch_normalization_50_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_variance_path = dir_prefix + std::string("batch_normalization_50_variance.bin"); - void* batch_normalization_50_variance = readTrainedWeights(batch_normalization_50_variance_path.c_str(), 0,1,2048,1,1); - std::string conv2d_51_w_path = dir_prefix + std::string("conv2d_51_w.bin"); - void* conv2d_51_w = readTrainedWeights(conv2d_51_w_path.c_str(), 0,512,2048,1,1); - std::string conv2d_51_b_path = dir_prefix + std::string("conv2d_51_b.bin"); - void* conv2d_51_b = readTrainedWeights(conv2d_51_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_gamma_path = dir_prefix + std::string("batch_normalization_51_gamma.bin"); - void* batch_normalization_51_gamma = readTrainedWeights(batch_normalization_51_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_beta_path = dir_prefix + std::string("batch_normalization_51_beta.bin"); - void* batch_normalization_51_beta = readTrainedWeights(batch_normalization_51_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_mean_path = dir_prefix + std::string("batch_normalization_51_mean.bin"); - void* batch_normalization_51_mean = readTrainedWeights(batch_normalization_51_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_variance_path = dir_prefix + std::string("batch_normalization_51_variance.bin"); - void* batch_normalization_51_variance = readTrainedWeights(batch_normalization_51_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_52_w_path = dir_prefix + std::string("conv2d_52_w.bin"); - void* conv2d_52_w = readTrainedWeights(conv2d_52_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_52_b_path = dir_prefix + std::string("conv2d_52_b.bin"); - void* conv2d_52_b = readTrainedWeights(conv2d_52_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_gamma_path = dir_prefix + std::string("batch_normalization_52_gamma.bin"); - void* batch_normalization_52_gamma = readTrainedWeights(batch_normalization_52_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_beta_path = dir_prefix + std::string("batch_normalization_52_beta.bin"); - void* batch_normalization_52_beta = readTrainedWeights(batch_normalization_52_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_mean_path = dir_prefix + std::string("batch_normalization_52_mean.bin"); - void* batch_normalization_52_mean = readTrainedWeights(batch_normalization_52_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_variance_path = dir_prefix + std::string("batch_normalization_52_variance.bin"); - void* batch_normalization_52_variance = readTrainedWeights(batch_normalization_52_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_53_w_path = dir_prefix + std::string("conv2d_53_w.bin"); - void* conv2d_53_w = readTrainedWeights(conv2d_53_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_53_b_path = dir_prefix + std::string("conv2d_53_b.bin"); - void* conv2d_53_b = readTrainedWeights(conv2d_53_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_gamma_path = dir_prefix + std::string("batch_normalization_53_gamma.bin"); - void* batch_normalization_53_gamma = readTrainedWeights(batch_normalization_53_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_beta_path = dir_prefix + std::string("batch_normalization_53_beta.bin"); - void* batch_normalization_53_beta = readTrainedWeights(batch_normalization_53_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_mean_path = dir_prefix + std::string("batch_normalization_53_mean.bin"); - void* batch_normalization_53_mean = readTrainedWeights(batch_normalization_53_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_variance_path = dir_prefix + std::string("batch_normalization_53_variance.bin"); - void* batch_normalization_53_variance = readTrainedWeights(batch_normalization_53_variance_path.c_str(), 0,1,2048,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,1000); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1000,1,1); - - - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - startMemTracking(); - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - // NOTE: The pooling stride is 3*3 while it should be 2*2 -- interface itself needs fixing -- fix this manually in this case - void* var_0 = ConvLayer_PROMISE2(input, -123.68, 151.061, conv2d_1_w, -0.574422012090683, 0.5646807488203113, conv2d_1_b, -0.004829655, 0.014784645, 3, 3, 2, 2, 0, 3, 2, 1, 0.0, 689.7822875976562, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = ConvLayer_PROMISE(var_1, -4.952117443084717, 12.02118032741582, conv2d_2_w, -0.5448235973715783, 0.2447893574833928, conv2d_2_b, -0.0001412337, 0.00017318528, 0, 0, 1, 1, -1, 0, -1, -9.212617980003357, 8.107657526016425, 9); - void* var_3 = tensorBatchNorm(var_2, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_4 = tensorRelu(var_3); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 5.801381123542811, conv2d_3_w, -0.18028786177933215, 0.21247629988193606, conv2d_3_b, -7.8663266e-05, 0.00018541634, 1, 1, 1, 1, -1, 0, -1, -6.834556140899658, 8.541351353645396, 9); - void* var_6 = tensorBatchNorm(var_5, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_7 = tensorRelu(var_6); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 9.866454153060971, conv2d_4_w, -0.2255178820490837, 0.2254851074665791, conv2d_4_b, -0.00017080337, 0.00021038808, 0, 0, 1, 1, -1, 0, -1, -3.595476400852203, 3.637018930196785, 9); - void* var_9 = tensorBatchNorm(var_8, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_10 = ConvLayer_PROMISE(var_1, -4.952117443084717, 12.02118032741582, conv2d_5_w, -0.43272915667295453, 0.29589187785983095, conv2d_5_b, -0.000107640364, 0.00013177324, 0, 0, 1, 1, -1, 0, -1, -7.581318395137787, 7.8835730876923265, 9); - void* var_11 = tensorBatchNorm(var_10, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_12 = tensorAdd(var_9, var_11); - void* var_13 = tensorRelu(var_12); - void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 5.885549548625953, conv2d_6_w, -0.17062100511789324, 0.1432653286457067, conv2d_6_b, -7.950033e-05, 0.000104833845, 0, 0, 1, 1, -1, 0, -1, -5.310503073692322, 3.8418860490322224, 9); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_17 = ConvLayer_PROMISE(var_16, 0.0, 4.006655237674757, conv2d_7_w, -0.15594010630249977, 0.15720265829563249, conv2d_7_b, -6.419372e-05, 6.503685e-05, 1, 1, 1, 1, -1, 0, -1, -3.4114532544612883, 3.075598966121696, 9); - void* var_18 = tensorBatchNorm(var_17, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_19 = tensorRelu(var_18); - void* var_20 = ConvLayer_PROMISE(var_19, 0.0, 4.186545849800112, conv2d_8_w, -0.1599232355505228, 0.17352246379853484, conv2d_8_b, -8.235522e-05, 0.000105946136, 0, 0, 1, 1, -1, 0, -1, -1.5299443051815034, 1.425760628223422, 9); - void* var_21 = tensorBatchNorm(var_20, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_22 = tensorAdd(var_21, var_13); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 6.36634494018557, conv2d_9_w, -0.14470596650242806, 0.14421831880510708, conv2d_9_b, -3.4270335e-05, 4.177745e-05, 0, 0, 1, 1, -1, 0, -1, -4.584994326114654, 3.8648653411866007, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 3.3001420612335437, conv2d_10_w, -0.12276832074671984, 0.12627632835507407, conv2d_10_b, -5.8183014e-05, 3.3546e-05, 1, 1, 1, 1, -1, 0, -1, -2.828902014493942, 3.0918669717311893, 9); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 5.313344509124818, conv2d_11_w, -0.1685639199912548, 0.16309838759899448, conv2d_11_b, -5.3248757e-05, 5.70645e-05, 0, 0, 1, 1, -1, 0, -1, -1.838510752558708, 1.3678752244711045, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorAdd(var_31, var_23); - void* var_33 = tensorRelu(var_32); - void* var_34 = ConvLayer_PROMISE(var_33, 0.0, 6.605899341106429, conv2d_12_w, -0.149728477448225, 0.13948052291572155, conv2d_12_b, -2.5221272e-05, 3.551765e-05, 0, 0, 2, 2, -1, 0, -1, -5.011460402488709, 3.915426737308551, 9); - void* var_35 = tensorBatchNorm(var_34, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_36 = tensorRelu(var_35); - void* var_37 = ConvLayer_PROMISE(var_36, 0.0, 3.794741600990312, conv2d_13_w, -0.09761696971952916, 0.11394361693412249, conv2d_13_b, -3.715329e-05, 2.9298411e-05, 1, 1, 1, 1, -1, 0, -1, -5.206686987876893, 4.520638871669791, 9); - void* var_38 = tensorBatchNorm(var_37, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_39 = tensorRelu(var_38); - void* var_40 = ConvLayer_PROMISE(var_39, 0.0, 3.7149479997158603, conv2d_14_w, -0.14844063371419908, 0.14925702929496953, conv2d_14_b, -6.0864673e-05, 5.4444306e-05, 0, 0, 1, 1, -1, 0, -1, -1.5011818276643754, 1.40834725618366, 9); - void* var_41 = tensorBatchNorm(var_40, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_42 = ConvLayer_PROMISE(var_33, 0.0, 6.605899341106429, conv2d_15_w, -0.1642171936035156, 0.16866817833483497, conv2d_15_b, -2.4068044e-05, 2.5504653e-05, 0, 0, 2, 2, -1, 0, -1, -4.410076716423035, 4.014970501422923, 9); - void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_44 = tensorAdd(var_41, var_43); - void* var_45 = tensorRelu(var_44); - void* var_46 = ConvLayer_PROMISE(var_45, 0.0, 6.518892978191488, conv2d_16_w, -0.09702376063913107, 0.1054209597408773, conv2d_16_b, -1.47610735e-05, 1.7075112e-05, 0, 0, 1, 1, -1, 0, -1, -4.87446900844574, 3.7661991298198862, 9); - void* var_47 = tensorBatchNorm(var_46, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_48 = tensorRelu(var_47); - void* var_49 = ConvLayer_PROMISE(var_48, 0.0, 3.259194364786183, conv2d_17_w, -0.08665236312896013, 0.0898308474570517, conv2d_17_b, -3.9163042e-05, 4.2771928e-05, 1, 1, 1, 1, -1, 0, -1, -2.673636848211288, 2.3574042041302774, 9); - void* var_50 = tensorBatchNorm(var_49, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_51 = tensorRelu(var_50); - void* var_52 = ConvLayer_PROMISE(var_51, 0.0, 3.641261647939746, conv2d_18_w, -0.12198246002197266, 0.1347003544867095, conv2d_18_b, -5.3173797e-05, 4.8076203e-05, 0, 0, 1, 1, -1, 0, -1, -1.0623184064626694, 0.916913630664359, 9); - void* var_53 = tensorBatchNorm(var_52, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_54 = tensorAdd(var_53, var_45); - void* var_55 = tensorRelu(var_54); - void* var_56 = ConvLayer_PROMISE(var_55, 0.0, 6.852215012073557, conv2d_19_w, -0.1122598509863019, 0.1435348897427337, conv2d_19_b, -1.20778e-05, 2.599136e-05, 0, 0, 1, 1, -1, 0, -1, -6.0281127138137816, 6.227049376964593, 9); - void* var_57 = tensorBatchNorm(var_56, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_58 = tensorRelu(var_57); - void* var_59 = ConvLayer_PROMISE(var_58, 0.0, 3.397107238292711, conv2d_20_w, -0.1049889962002635, 0.1349111200869117, conv2d_20_b, -2.7412994e-05, 3.9722e-05, 1, 1, 1, 1, -1, 0, -1, -4.057081372261047, 4.329259678363884, 9); - void* var_60 = tensorBatchNorm(var_59, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_61 = tensorRelu(var_60); - void* var_62 = ConvLayer_PROMISE(var_61, 0.0, 3.6484641625881262, conv2d_21_w, -0.1401274445652962, 0.12122062336653527, conv2d_21_b, -5.5854776e-05, 7.8164114e-05, 0, 0, 1, 1, -1, 0, -1, -1.626526164531708, 0.8401960272193048, 9); - void* var_63 = tensorBatchNorm(var_62, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_64 = tensorAdd(var_63, var_55); - void* var_65 = tensorRelu(var_64); - void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 6.820035747528095, conv2d_22_w, -0.16039140529930593, 0.18889211259782335, conv2d_22_b, -4.6078047e-05, 3.3613425e-05, 0, 0, 1, 1, -1, 0, -1, -4.6271090393066405, 4.527790556430912, 9); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = ConvLayer_PROMISE(var_68, 0.0, 4.432856665611537, conv2d_23_w, -0.11397356178611517, 0.10787127982825667, conv2d_23_b, -3.6726604e-05, 2.4220695e-05, 1, 1, 1, 1, -1, 0, -1, -3.697339488506317, 3.1427979104519426, 9); - void* var_70 = tensorBatchNorm(var_69, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_71 = tensorRelu(var_70); - void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 4.711423307418915, conv2d_24_w, -0.11341997660696507, 0.1437816035747536, conv2d_24_b, -2.7102393e-05, 3.091236e-05, 0, 0, 1, 1, -1, 0, -1, -1.4133628906011582, 1.2987316379547167, 9); - void* var_73 = tensorBatchNorm(var_72, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_74 = tensorAdd(var_73, var_65); - void* var_75 = tensorRelu(var_74); - void* var_76 = ConvLayer_PROMISE(var_75, 0.0, 7.624651549339404, conv2d_25_w, -0.10495923960208893, 0.12068889104576047, conv2d_25_b, -1.0208429e-05, 1.1486276e-05, 0, 0, 2, 2, -1, 0, -1, -3.87531214427948, 3.676609352588745, 9); - void* var_77 = tensorBatchNorm(var_76, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_78 = tensorRelu(var_77); - void* var_79 = ConvLayer_PROMISE(var_78, 0.0, 4.044620439529737, conv2d_26_w, -0.07615160812437534, 0.07977425544709099, conv2d_26_b, -2.4272886e-05, 1.6434806e-05, 1, 1, 1, 1, -1, 0, -1, -6.102653044223786, 4.761939919948585, 9); - void* var_80 = tensorBatchNorm(var_79, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_81 = tensorRelu(var_80); - void* var_82 = ConvLayer_PROMISE(var_81, 0.0, 3.4468260111809705, conv2d_27_w, -0.11533496034890414, 0.10714908299595141, conv2d_27_b, -3.225456e-05, 4.8422902e-05, 0, 0, 1, 1, -1, 0, -1, -1.319659793496132, 1.0189965035915467, 9); - void* var_83 = tensorBatchNorm(var_82, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_84 = ConvLayer_PROMISE(var_75, 0.0, 7.624651549339404, conv2d_28_w, -0.0966497472524643, 0.10240990699082783, conv2d_28_b, -1.4815519e-05, 1.554276e-05, 0, 0, 2, 2, -1, 0, -1, -3.9412443549633025, 3.863056869030064, 9); - void* var_85 = tensorBatchNorm(var_84, batch_normalization_28_gamma, batch_normalization_28_beta, batch_normalization_28_mean, batch_normalization_28_variance, 0.001); - void* var_86 = tensorAdd(var_83, var_85); - void* var_87 = tensorRelu(var_86); - void* var_88 = ConvLayer_PROMISE(var_87, 0.0, 6.879177100658442, conv2d_29_w, -0.06468586190789938, 0.08113565444201333, conv2d_29_b, -7.4607115e-06, 6.926009e-06, 0, 0, 1, 1, -1, 0, -1, -7.112777866363525, 4.633408185959027, 9); - void* var_89 = tensorBatchNorm(var_88, batch_normalization_29_gamma, batch_normalization_29_beta, batch_normalization_29_mean, batch_normalization_29_variance, 0.001); - void* var_90 = tensorRelu(var_89); - void* var_91 = ConvLayer_PROMISE(var_90, 0.0, 3.2354076790810105, conv2d_30_w, -0.06493933162838221, 0.07104272978752861, conv2d_30_b, -1.9349398e-05, 2.0178473e-05, 1, 1, 1, 1, -1, 0, -1, -3.226332322359085, 2.5138739056587447, 9); - void* var_92 = tensorBatchNorm(var_91, batch_normalization_30_gamma, batch_normalization_30_beta, batch_normalization_30_mean, batch_normalization_30_variance, 0.001); - void* var_93 = tensorRelu(var_92); - void* var_94 = ConvLayer_PROMISE(var_93, 0.0, 3.003848925829006, conv2d_31_w, -0.0918996930718422, 0.08853508594632167, conv2d_31_b, -4.2279236e-05, 5.5378885e-05, 0, 0, 1, 1, -1, 0, -1, -0.9247466986179351, 0.572747143149404, 9); - void* var_95 = tensorBatchNorm(var_94, batch_normalization_31_gamma, batch_normalization_31_beta, batch_normalization_31_mean, batch_normalization_31_variance, 0.001); - void* var_96 = tensorAdd(var_95, var_87); - void* var_97 = tensorRelu(var_96); - void* var_98 = ConvLayer_PROMISE(var_97, 0.0, 6.566591289043519, conv2d_32_w, -0.07145480328053236, 0.09098157961666606, conv2d_32_b, -1.0478255e-05, 1.4408147e-05, 0, 0, 1, 1, -1, 0, -1, -4.183038790225982, 3.5941159300804166, 9); - void* var_99 = tensorBatchNorm(var_98, batch_normalization_32_gamma, batch_normalization_32_beta, batch_normalization_32_mean, batch_normalization_32_variance, 0.001); - void* var_100 = tensorRelu(var_99); - void* var_101 = ConvLayer_PROMISE(var_100, 0.0, 3.0348211803436556, conv2d_33_w, -0.056237234909087414, 0.06478620118647821, conv2d_33_b, -2.2639133e-05, 2.6081116e-05, 1, 1, 1, 1, -1, 0, -1, -2.098393235206604, 1.706788736581844, 9); - void* var_102 = tensorBatchNorm(var_101, batch_normalization_33_gamma, batch_normalization_33_beta, batch_normalization_33_mean, batch_normalization_33_variance, 0.001); - void* var_103 = tensorRelu(var_102); - void* var_104 = ConvLayer_PROMISE(var_103, 0.0, 3.248518852949145, conv2d_34_w, -0.07141499005258084, 0.08281665176153225, conv2d_34_b, -3.221229e-05, 4.569047e-05, 0, 0, 1, 1, -1, 0, -1, -0.8273181943893433, 0.7378616912961369, 9); - void* var_105 = tensorBatchNorm(var_104, batch_normalization_34_gamma, batch_normalization_34_beta, batch_normalization_34_mean, batch_normalization_34_variance, 0.001); - void* var_106 = tensorAdd(var_105, var_97); - void* var_107 = tensorRelu(var_106); - void* var_108 = ConvLayer_PROMISE(var_107, 0.0, 6.7038991017341765, conv2d_35_w, -0.06838216692209244, 0.09303134681284767, conv2d_35_b, -1.047402e-05, 1.0168567e-05, 0, 0, 1, 1, -1, 0, -1, -4.168091129779816, 3.5077465448380494, 9); - void* var_109 = tensorBatchNorm(var_108, batch_normalization_35_gamma, batch_normalization_35_beta, batch_normalization_35_mean, batch_normalization_35_variance, 0.001); - void* var_110 = tensorRelu(var_109); - void* var_111 = ConvLayer_PROMISE(var_110, 0.0, 2.8976624414922814, conv2d_36_w, -0.05521866928786039, 0.06331418491154919, conv2d_36_b, -3.86494e-05, 2.5999781e-05, 1, 1, 1, 1, -1, 0, -1, -2.182177306175232, 2.0366714165211324, 9); - void* var_112 = tensorBatchNorm(var_111, batch_normalization_36_gamma, batch_normalization_36_beta, batch_normalization_36_mean, batch_normalization_36_variance, 0.001); - void* var_113 = tensorRelu(var_112); - void* var_114 = ConvLayer_PROMISE(var_113, 0.0, 3.1310220296382933, conv2d_37_w, -0.07256266868114472, 0.08391195811331292, conv2d_37_b, -4.8211587e-05, 4.7546604e-05, 0, 0, 1, 1, -1, 0, -1, -1.1372777166366577, 0.5528145518899268, 9); - void* var_115 = tensorBatchNorm(var_114, batch_normalization_37_gamma, batch_normalization_37_beta, batch_normalization_37_mean, batch_normalization_37_variance, 0.001); - void* var_116 = tensorAdd(var_115, var_107); - void* var_117 = tensorRelu(var_116); - void* var_118 = ConvLayer_PROMISE(var_117, 0.0, 6.625923678875129, conv2d_38_w, -0.06549047549813986, 0.10113389839232205, conv2d_38_b, -1.2351429e-05, 9.263066e-06, 0, 0, 1, 1, -1, 0, -1, -3.846879935503006, 3.639795066118241, 9); - void* var_119 = tensorBatchNorm(var_118, batch_normalization_38_gamma, batch_normalization_38_beta, batch_normalization_38_mean, batch_normalization_38_variance, 0.001); - void* var_120 = tensorRelu(var_119); - void* var_121 = ConvLayer_PROMISE(var_120, 0.0, 3.200671393632918, conv2d_39_w, -0.05184716333821415, 0.06296417640149599, conv2d_39_b, -2.4313656e-05, 3.812053e-05, 1, 1, 1, 1, -1, 0, -1, -1.9442583957910538, 1.5269825316667864, 9); - void* var_122 = tensorBatchNorm(var_121, batch_normalization_39_gamma, batch_normalization_39_beta, batch_normalization_39_mean, batch_normalization_39_variance, 0.001); - void* var_123 = tensorRelu(var_122); - void* var_124 = ConvLayer_PROMISE(var_123, 0.0, 4.040827783107826, conv2d_40_w, -0.0670140995979309, 0.0777734544128187, conv2d_40_b, -3.378767e-05, 2.5727571e-05, 0, 0, 1, 1, -1, 0, -1, -1.3243955926895141, 0.9261298480034093, 9); - void* var_125 = tensorBatchNorm(var_124, batch_normalization_40_gamma, batch_normalization_40_beta, batch_normalization_40_mean, batch_normalization_40_variance, 0.001); - void* var_126 = tensorAdd(var_125, var_117); - void* var_127 = tensorRelu(var_126); - void* var_128 = ConvLayer_PROMISE(var_127, 0.0, 6.8198375024796505, conv2d_41_w, -0.0710306192561984, 0.10828035335987954, conv2d_41_b, -1.3110192e-05, 1.5449377e-05, 0, 0, 1, 1, -1, 0, -1, -3.2434056091308596, 5.530628140926378, 9); - void* var_129 = tensorBatchNorm(var_128, batch_normalization_41_gamma, batch_normalization_41_beta, batch_normalization_41_mean, batch_normalization_41_variance, 0.001); - void* var_130 = tensorRelu(var_129); - void* var_131 = ConvLayer_PROMISE(var_130, 0.0, 4.811174154282, conv2d_42_w, -0.056100725468248125, 0.06774817473441476, conv2d_42_b, -2.7899796e-05, 3.0695155e-05, 1, 1, 1, 1, -1, 0, -1, -3.553957043647766, 3.0058912243844595, 9); - void* var_132 = tensorBatchNorm(var_131, batch_normalization_42_gamma, batch_normalization_42_beta, batch_normalization_42_mean, batch_normalization_42_variance, 0.001); - void* var_133 = tensorRelu(var_132); - void* var_134 = ConvLayer_PROMISE(var_133, 0.0, 6.503577950477883, conv2d_43_w, -0.06820484285801648, 0.0836490480080298, conv2d_43_b, -2.2592936e-05, 2.3876093e-05, 0, 0, 1, 1, -1, 0, -1, -2.760284422159195, 1.1501846584081763, 9); - void* var_135 = tensorBatchNorm(var_134, batch_normalization_43_gamma, batch_normalization_43_beta, batch_normalization_43_mean, batch_normalization_43_variance, 0.001); - void* var_136 = tensorAdd(var_135, var_127); - void* var_137 = tensorRelu(var_136); - void* var_138 = ConvLayer_PROMISE(var_137, 0.0, 7.423539982796591, conv2d_44_w, -0.06768814034759998, 0.07900290366262253, conv2d_44_b, -1.0954906e-05, 1.2313803e-05, 0, 0, 2, 2, -1, 0, -1, -3.8250768241882325, 3.133637444972998, 9); - void* var_139 = tensorBatchNorm(var_138, batch_normalization_44_gamma, batch_normalization_44_beta, batch_normalization_44_mean, batch_normalization_44_variance, 0.001); - void* var_140 = tensorRelu(var_139); - void* var_141 = ConvLayer_PROMISE(var_140, 0.0, 3.234270730257073, conv2d_45_w, -0.04219715926796198, 0.04603923132643117, conv2d_45_b, -1.9525614e-05, 2.6300824e-05, 1, 1, 1, 1, -1, 0, -1, -3.2753402066230777, 1.8960905054807824, 9); - void* var_142 = tensorBatchNorm(var_141, batch_normalization_45_gamma, batch_normalization_45_beta, batch_normalization_45_mean, batch_normalization_45_variance, 0.001); - void* var_143 = tensorRelu(var_142); - void* var_144 = ConvLayer_PROMISE(var_143, 0.0, 2.675833512783051, conv2d_46_w, -0.051137199997901915, 0.07428906522691328, conv2d_46_b, -2.6416203e-05, 3.079251e-05, 0, 0, 1, 1, -1, 0, -1, -0.6374539139270782, 0.6678488029241574, 9); - void* var_145 = tensorBatchNorm(var_144, batch_normalization_46_gamma, batch_normalization_46_beta, batch_normalization_46_mean, batch_normalization_46_variance, 0.001); - void* var_146 = ConvLayer_PROMISE(var_137, 0.0, 7.423539982796591, conv2d_47_w, -0.047168924897909165, 0.06949675244092963, conv2d_47_b, -1.2322937e-05, 2.1868867e-05, 0, 0, 2, 2, -1, 0, -1, -1.8896190267801285, 2.387520755291127, 9); - void* var_147 = tensorBatchNorm(var_146, batch_normalization_47_gamma, batch_normalization_47_beta, batch_normalization_47_mean, batch_normalization_47_variance, 0.001); - void* var_148 = tensorAdd(var_145, var_147); - void* var_149 = tensorRelu(var_148); - void* var_150 = ConvLayer_PROMISE(var_149, 0.0, 12.392736603737378, conv2d_48_w, -0.04417608780786395, 0.06200448917225007, conv2d_48_b, -6.6323187e-06, 7.1494946e-06, 0, 0, 1, 1, -1, 0, -1, -9.068103209495545, 5.912482521057253, 9); - void* var_151 = tensorBatchNorm(var_150, batch_normalization_48_gamma, batch_normalization_48_beta, batch_normalization_48_mean, batch_normalization_48_variance, 0.001); - void* var_152 = tensorRelu(var_151); - void* var_153 = ConvLayer_PROMISE(var_152, 0.0, 2.565971518278122, conv2d_49_w, -0.036550714168697596, 0.042889032773673605, conv2d_49_b, -3.1749918e-05, 3.1403273e-05, 1, 1, 1, 1, -1, 0, -1, -2.0715825698375703, 1.4426317431927056, 9); - void* var_154 = tensorBatchNorm(var_153, batch_normalization_49_gamma, batch_normalization_49_beta, batch_normalization_49_mean, batch_normalization_49_variance, 0.001); - void* var_155 = tensorRelu(var_154); - void* var_156 = ConvLayer_PROMISE(var_155, 0.0, 2.2121606218814973, conv2d_50_w, -0.04563436089083552, 0.07235725801438761, conv2d_50_b, -5.138708e-05, 5.6959605e-05, 0, 0, 1, 1, -1, 0, -1, -0.5048498404622078, 0.4972966857850613, 9); - void* var_157 = tensorBatchNorm(var_156, batch_normalization_50_gamma, batch_normalization_50_beta, batch_normalization_50_mean, batch_normalization_50_variance, 0.001); - void* var_158 = tensorAdd(var_157, var_149); - void* var_159 = tensorRelu(var_158); - void* var_160 = ConvLayer_PROMISE(var_159, 0.0, 12.996321228027455, conv2d_51_w, -0.051894455961883065, 0.07700131461024579, conv2d_51_b, -8.893526e-06, 7.6235174e-06, 0, 0, 1, 1, -1, 0, -1, -7.534810958862305, 7.1688279371266015, 9); - void* var_161 = tensorBatchNorm(var_160, batch_normalization_51_gamma, batch_normalization_51_beta, batch_normalization_51_mean, batch_normalization_51_variance, 0.001); - void* var_162 = tensorRelu(var_161); - void* var_163 = ConvLayer_PROMISE(var_162, 0.0, 2.806837086677553, conv2d_52_w, -0.032556386385113004, 0.038920990321785316, conv2d_52_b, -3.1544037e-05, 4.5056524e-05, 1, 1, 1, 1, -1, 0, -1, -1.6795331789255141, 0.9551341712474886, 9); - void* var_164 = tensorBatchNorm(var_163, batch_normalization_52_gamma, batch_normalization_52_beta, batch_normalization_52_mean, batch_normalization_52_variance, 0.001); - void* var_165 = tensorRelu(var_164); - void* var_166 = ConvLayer_PROMISE(var_165, 0.0, 2.7935527668000724, conv2d_53_w, -0.04313115822151303, 0.0774340439587877, conv2d_53_b, -2.8713988e-05, 4.1641888e-05, 0, 0, 1, 1, -1, 0, -1, -0.5173906384706497, 0.5710835611820362, 9); - void* var_167 = tensorBatchNorm(var_166, batch_normalization_53_gamma, batch_normalization_53_beta, batch_normalization_53_mean, batch_normalization_53_variance, 0.001); - void* var_168 = tensorAdd(var_167, var_159); - void* var_169 = tensorRelu(var_168); - void* var_170 = tensorPooling(var_169,1,7,7,0,0,7,7); - void* var_171 = FCLayer_PROMISE(var_170, 0.0, 5.305631495475859, dense_1_w, -0.09220413094758988, 0.24919447432458666, dense_1_b, -0.024729362, 0.028545722, -1, -6.579668023586273, 7.794472872257277, 9); - void* var_172 = tensorSoftmax(var_171); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_172); - final_accuracy += accuracy; - - dumpAccuracyNorms(); - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet50_imagenet_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet50_imagenet_promise.cc deleted file mode 100644 index 8355c78cb553926759201fd070ced79e6a59f0b9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet50_imagenet_promise.cc +++ /dev/null @@ -1,874 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 2000; - int batch_size = 100; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("/shared/hsharif3/resnet50_imagenet/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,7,7); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,1,1); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,64,256,1,1); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,64,256,1,1); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,128,256,1,1); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,512,256,1,1); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_22_w_path = dir_prefix + std::string("conv2d_22_w.bin"); - void* conv2d_22_w = readTrainedWeights(conv2d_22_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_22_b_path = dir_prefix + std::string("conv2d_22_b.bin"); - void* conv2d_22_b = readTrainedWeights(conv2d_22_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_23_w_path = dir_prefix + std::string("conv2d_23_w.bin"); - void* conv2d_23_w = readTrainedWeights(conv2d_23_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_23_b_path = dir_prefix + std::string("conv2d_23_b.bin"); - void* conv2d_23_b = readTrainedWeights(conv2d_23_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_24_w_path = dir_prefix + std::string("conv2d_24_w.bin"); - void* conv2d_24_w = readTrainedWeights(conv2d_24_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_24_b_path = dir_prefix + std::string("conv2d_24_b.bin"); - void* conv2d_24_b = readTrainedWeights(conv2d_24_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_25_w_path = dir_prefix + std::string("conv2d_25_w.bin"); - void* conv2d_25_w = readTrainedWeights(conv2d_25_w_path.c_str(), 0,256,512,1,1); - std::string conv2d_25_b_path = dir_prefix + std::string("conv2d_25_b.bin"); - void* conv2d_25_b = readTrainedWeights(conv2d_25_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_26_w_path = dir_prefix + std::string("conv2d_26_w.bin"); - void* conv2d_26_w = readTrainedWeights(conv2d_26_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_26_b_path = dir_prefix + std::string("conv2d_26_b.bin"); - void* conv2d_26_b = readTrainedWeights(conv2d_26_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_27_w_path = dir_prefix + std::string("conv2d_27_w.bin"); - void* conv2d_27_w = readTrainedWeights(conv2d_27_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_27_b_path = dir_prefix + std::string("conv2d_27_b.bin"); - void* conv2d_27_b = readTrainedWeights(conv2d_27_b_path.c_str(), 0,1,1024,1,1); - std::string conv2d_28_w_path = dir_prefix + std::string("conv2d_28_w.bin"); - void* conv2d_28_w = readTrainedWeights(conv2d_28_w_path.c_str(), 0,1024,512,1,1); - std::string conv2d_28_b_path = dir_prefix + std::string("conv2d_28_b.bin"); - void* conv2d_28_b = readTrainedWeights(conv2d_28_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_gamma_path = dir_prefix + std::string("batch_normalization_28_gamma.bin"); - void* batch_normalization_28_gamma = readTrainedWeights(batch_normalization_28_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_beta_path = dir_prefix + std::string("batch_normalization_28_beta.bin"); - void* batch_normalization_28_beta = readTrainedWeights(batch_normalization_28_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_mean_path = dir_prefix + std::string("batch_normalization_28_mean.bin"); - void* batch_normalization_28_mean = readTrainedWeights(batch_normalization_28_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_variance_path = dir_prefix + std::string("batch_normalization_28_variance.bin"); - void* batch_normalization_28_variance = readTrainedWeights(batch_normalization_28_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_29_w_path = dir_prefix + std::string("conv2d_29_w.bin"); - void* conv2d_29_w = readTrainedWeights(conv2d_29_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_29_b_path = dir_prefix + std::string("conv2d_29_b.bin"); - void* conv2d_29_b = readTrainedWeights(conv2d_29_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_gamma_path = dir_prefix + std::string("batch_normalization_29_gamma.bin"); - void* batch_normalization_29_gamma = readTrainedWeights(batch_normalization_29_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_beta_path = dir_prefix + std::string("batch_normalization_29_beta.bin"); - void* batch_normalization_29_beta = readTrainedWeights(batch_normalization_29_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_mean_path = dir_prefix + std::string("batch_normalization_29_mean.bin"); - void* batch_normalization_29_mean = readTrainedWeights(batch_normalization_29_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_variance_path = dir_prefix + std::string("batch_normalization_29_variance.bin"); - void* batch_normalization_29_variance = readTrainedWeights(batch_normalization_29_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_30_w_path = dir_prefix + std::string("conv2d_30_w.bin"); - void* conv2d_30_w = readTrainedWeights(conv2d_30_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_30_b_path = dir_prefix + std::string("conv2d_30_b.bin"); - void* conv2d_30_b = readTrainedWeights(conv2d_30_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_gamma_path = dir_prefix + std::string("batch_normalization_30_gamma.bin"); - void* batch_normalization_30_gamma = readTrainedWeights(batch_normalization_30_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_beta_path = dir_prefix + std::string("batch_normalization_30_beta.bin"); - void* batch_normalization_30_beta = readTrainedWeights(batch_normalization_30_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_mean_path = dir_prefix + std::string("batch_normalization_30_mean.bin"); - void* batch_normalization_30_mean = readTrainedWeights(batch_normalization_30_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_variance_path = dir_prefix + std::string("batch_normalization_30_variance.bin"); - void* batch_normalization_30_variance = readTrainedWeights(batch_normalization_30_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_31_w_path = dir_prefix + std::string("conv2d_31_w.bin"); - void* conv2d_31_w = readTrainedWeights(conv2d_31_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_31_b_path = dir_prefix + std::string("conv2d_31_b.bin"); - void* conv2d_31_b = readTrainedWeights(conv2d_31_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_gamma_path = dir_prefix + std::string("batch_normalization_31_gamma.bin"); - void* batch_normalization_31_gamma = readTrainedWeights(batch_normalization_31_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_beta_path = dir_prefix + std::string("batch_normalization_31_beta.bin"); - void* batch_normalization_31_beta = readTrainedWeights(batch_normalization_31_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_mean_path = dir_prefix + std::string("batch_normalization_31_mean.bin"); - void* batch_normalization_31_mean = readTrainedWeights(batch_normalization_31_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_variance_path = dir_prefix + std::string("batch_normalization_31_variance.bin"); - void* batch_normalization_31_variance = readTrainedWeights(batch_normalization_31_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_32_w_path = dir_prefix + std::string("conv2d_32_w.bin"); - void* conv2d_32_w = readTrainedWeights(conv2d_32_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_32_b_path = dir_prefix + std::string("conv2d_32_b.bin"); - void* conv2d_32_b = readTrainedWeights(conv2d_32_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_gamma_path = dir_prefix + std::string("batch_normalization_32_gamma.bin"); - void* batch_normalization_32_gamma = readTrainedWeights(batch_normalization_32_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_beta_path = dir_prefix + std::string("batch_normalization_32_beta.bin"); - void* batch_normalization_32_beta = readTrainedWeights(batch_normalization_32_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_mean_path = dir_prefix + std::string("batch_normalization_32_mean.bin"); - void* batch_normalization_32_mean = readTrainedWeights(batch_normalization_32_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_variance_path = dir_prefix + std::string("batch_normalization_32_variance.bin"); - void* batch_normalization_32_variance = readTrainedWeights(batch_normalization_32_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_33_w_path = dir_prefix + std::string("conv2d_33_w.bin"); - void* conv2d_33_w = readTrainedWeights(conv2d_33_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_33_b_path = dir_prefix + std::string("conv2d_33_b.bin"); - void* conv2d_33_b = readTrainedWeights(conv2d_33_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_gamma_path = dir_prefix + std::string("batch_normalization_33_gamma.bin"); - void* batch_normalization_33_gamma = readTrainedWeights(batch_normalization_33_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_beta_path = dir_prefix + std::string("batch_normalization_33_beta.bin"); - void* batch_normalization_33_beta = readTrainedWeights(batch_normalization_33_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_mean_path = dir_prefix + std::string("batch_normalization_33_mean.bin"); - void* batch_normalization_33_mean = readTrainedWeights(batch_normalization_33_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_variance_path = dir_prefix + std::string("batch_normalization_33_variance.bin"); - void* batch_normalization_33_variance = readTrainedWeights(batch_normalization_33_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_34_w_path = dir_prefix + std::string("conv2d_34_w.bin"); - void* conv2d_34_w = readTrainedWeights(conv2d_34_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_34_b_path = dir_prefix + std::string("conv2d_34_b.bin"); - void* conv2d_34_b = readTrainedWeights(conv2d_34_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_gamma_path = dir_prefix + std::string("batch_normalization_34_gamma.bin"); - void* batch_normalization_34_gamma = readTrainedWeights(batch_normalization_34_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_beta_path = dir_prefix + std::string("batch_normalization_34_beta.bin"); - void* batch_normalization_34_beta = readTrainedWeights(batch_normalization_34_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_mean_path = dir_prefix + std::string("batch_normalization_34_mean.bin"); - void* batch_normalization_34_mean = readTrainedWeights(batch_normalization_34_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_variance_path = dir_prefix + std::string("batch_normalization_34_variance.bin"); - void* batch_normalization_34_variance = readTrainedWeights(batch_normalization_34_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_35_w_path = dir_prefix + std::string("conv2d_35_w.bin"); - void* conv2d_35_w = readTrainedWeights(conv2d_35_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_35_b_path = dir_prefix + std::string("conv2d_35_b.bin"); - void* conv2d_35_b = readTrainedWeights(conv2d_35_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_gamma_path = dir_prefix + std::string("batch_normalization_35_gamma.bin"); - void* batch_normalization_35_gamma = readTrainedWeights(batch_normalization_35_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_beta_path = dir_prefix + std::string("batch_normalization_35_beta.bin"); - void* batch_normalization_35_beta = readTrainedWeights(batch_normalization_35_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_mean_path = dir_prefix + std::string("batch_normalization_35_mean.bin"); - void* batch_normalization_35_mean = readTrainedWeights(batch_normalization_35_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_variance_path = dir_prefix + std::string("batch_normalization_35_variance.bin"); - void* batch_normalization_35_variance = readTrainedWeights(batch_normalization_35_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_36_w_path = dir_prefix + std::string("conv2d_36_w.bin"); - void* conv2d_36_w = readTrainedWeights(conv2d_36_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_36_b_path = dir_prefix + std::string("conv2d_36_b.bin"); - void* conv2d_36_b = readTrainedWeights(conv2d_36_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_gamma_path = dir_prefix + std::string("batch_normalization_36_gamma.bin"); - void* batch_normalization_36_gamma = readTrainedWeights(batch_normalization_36_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_beta_path = dir_prefix + std::string("batch_normalization_36_beta.bin"); - void* batch_normalization_36_beta = readTrainedWeights(batch_normalization_36_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_mean_path = dir_prefix + std::string("batch_normalization_36_mean.bin"); - void* batch_normalization_36_mean = readTrainedWeights(batch_normalization_36_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_variance_path = dir_prefix + std::string("batch_normalization_36_variance.bin"); - void* batch_normalization_36_variance = readTrainedWeights(batch_normalization_36_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_37_w_path = dir_prefix + std::string("conv2d_37_w.bin"); - void* conv2d_37_w = readTrainedWeights(conv2d_37_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_37_b_path = dir_prefix + std::string("conv2d_37_b.bin"); - void* conv2d_37_b = readTrainedWeights(conv2d_37_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_gamma_path = dir_prefix + std::string("batch_normalization_37_gamma.bin"); - void* batch_normalization_37_gamma = readTrainedWeights(batch_normalization_37_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_beta_path = dir_prefix + std::string("batch_normalization_37_beta.bin"); - void* batch_normalization_37_beta = readTrainedWeights(batch_normalization_37_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_mean_path = dir_prefix + std::string("batch_normalization_37_mean.bin"); - void* batch_normalization_37_mean = readTrainedWeights(batch_normalization_37_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_variance_path = dir_prefix + std::string("batch_normalization_37_variance.bin"); - void* batch_normalization_37_variance = readTrainedWeights(batch_normalization_37_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_38_w_path = dir_prefix + std::string("conv2d_38_w.bin"); - void* conv2d_38_w = readTrainedWeights(conv2d_38_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_38_b_path = dir_prefix + std::string("conv2d_38_b.bin"); - void* conv2d_38_b = readTrainedWeights(conv2d_38_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_gamma_path = dir_prefix + std::string("batch_normalization_38_gamma.bin"); - void* batch_normalization_38_gamma = readTrainedWeights(batch_normalization_38_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_beta_path = dir_prefix + std::string("batch_normalization_38_beta.bin"); - void* batch_normalization_38_beta = readTrainedWeights(batch_normalization_38_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_mean_path = dir_prefix + std::string("batch_normalization_38_mean.bin"); - void* batch_normalization_38_mean = readTrainedWeights(batch_normalization_38_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_variance_path = dir_prefix + std::string("batch_normalization_38_variance.bin"); - void* batch_normalization_38_variance = readTrainedWeights(batch_normalization_38_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_39_w_path = dir_prefix + std::string("conv2d_39_w.bin"); - void* conv2d_39_w = readTrainedWeights(conv2d_39_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_39_b_path = dir_prefix + std::string("conv2d_39_b.bin"); - void* conv2d_39_b = readTrainedWeights(conv2d_39_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_gamma_path = dir_prefix + std::string("batch_normalization_39_gamma.bin"); - void* batch_normalization_39_gamma = readTrainedWeights(batch_normalization_39_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_beta_path = dir_prefix + std::string("batch_normalization_39_beta.bin"); - void* batch_normalization_39_beta = readTrainedWeights(batch_normalization_39_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_mean_path = dir_prefix + std::string("batch_normalization_39_mean.bin"); - void* batch_normalization_39_mean = readTrainedWeights(batch_normalization_39_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_variance_path = dir_prefix + std::string("batch_normalization_39_variance.bin"); - void* batch_normalization_39_variance = readTrainedWeights(batch_normalization_39_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_40_w_path = dir_prefix + std::string("conv2d_40_w.bin"); - void* conv2d_40_w = readTrainedWeights(conv2d_40_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_40_b_path = dir_prefix + std::string("conv2d_40_b.bin"); - void* conv2d_40_b = readTrainedWeights(conv2d_40_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_gamma_path = dir_prefix + std::string("batch_normalization_40_gamma.bin"); - void* batch_normalization_40_gamma = readTrainedWeights(batch_normalization_40_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_beta_path = dir_prefix + std::string("batch_normalization_40_beta.bin"); - void* batch_normalization_40_beta = readTrainedWeights(batch_normalization_40_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_mean_path = dir_prefix + std::string("batch_normalization_40_mean.bin"); - void* batch_normalization_40_mean = readTrainedWeights(batch_normalization_40_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_variance_path = dir_prefix + std::string("batch_normalization_40_variance.bin"); - void* batch_normalization_40_variance = readTrainedWeights(batch_normalization_40_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_41_w_path = dir_prefix + std::string("conv2d_41_w.bin"); - void* conv2d_41_w = readTrainedWeights(conv2d_41_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_41_b_path = dir_prefix + std::string("conv2d_41_b.bin"); - void* conv2d_41_b = readTrainedWeights(conv2d_41_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_gamma_path = dir_prefix + std::string("batch_normalization_41_gamma.bin"); - void* batch_normalization_41_gamma = readTrainedWeights(batch_normalization_41_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_beta_path = dir_prefix + std::string("batch_normalization_41_beta.bin"); - void* batch_normalization_41_beta = readTrainedWeights(batch_normalization_41_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_mean_path = dir_prefix + std::string("batch_normalization_41_mean.bin"); - void* batch_normalization_41_mean = readTrainedWeights(batch_normalization_41_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_variance_path = dir_prefix + std::string("batch_normalization_41_variance.bin"); - void* batch_normalization_41_variance = readTrainedWeights(batch_normalization_41_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_42_w_path = dir_prefix + std::string("conv2d_42_w.bin"); - void* conv2d_42_w = readTrainedWeights(conv2d_42_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_42_b_path = dir_prefix + std::string("conv2d_42_b.bin"); - void* conv2d_42_b = readTrainedWeights(conv2d_42_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_gamma_path = dir_prefix + std::string("batch_normalization_42_gamma.bin"); - void* batch_normalization_42_gamma = readTrainedWeights(batch_normalization_42_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_beta_path = dir_prefix + std::string("batch_normalization_42_beta.bin"); - void* batch_normalization_42_beta = readTrainedWeights(batch_normalization_42_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_mean_path = dir_prefix + std::string("batch_normalization_42_mean.bin"); - void* batch_normalization_42_mean = readTrainedWeights(batch_normalization_42_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_variance_path = dir_prefix + std::string("batch_normalization_42_variance.bin"); - void* batch_normalization_42_variance = readTrainedWeights(batch_normalization_42_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_43_w_path = dir_prefix + std::string("conv2d_43_w.bin"); - void* conv2d_43_w = readTrainedWeights(conv2d_43_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_43_b_path = dir_prefix + std::string("conv2d_43_b.bin"); - void* conv2d_43_b = readTrainedWeights(conv2d_43_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_gamma_path = dir_prefix + std::string("batch_normalization_43_gamma.bin"); - void* batch_normalization_43_gamma = readTrainedWeights(batch_normalization_43_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_beta_path = dir_prefix + std::string("batch_normalization_43_beta.bin"); - void* batch_normalization_43_beta = readTrainedWeights(batch_normalization_43_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_mean_path = dir_prefix + std::string("batch_normalization_43_mean.bin"); - void* batch_normalization_43_mean = readTrainedWeights(batch_normalization_43_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_variance_path = dir_prefix + std::string("batch_normalization_43_variance.bin"); - void* batch_normalization_43_variance = readTrainedWeights(batch_normalization_43_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_44_w_path = dir_prefix + std::string("conv2d_44_w.bin"); - void* conv2d_44_w = readTrainedWeights(conv2d_44_w_path.c_str(), 0,512,1024,1,1); - std::string conv2d_44_b_path = dir_prefix + std::string("conv2d_44_b.bin"); - void* conv2d_44_b = readTrainedWeights(conv2d_44_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_gamma_path = dir_prefix + std::string("batch_normalization_44_gamma.bin"); - void* batch_normalization_44_gamma = readTrainedWeights(batch_normalization_44_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_beta_path = dir_prefix + std::string("batch_normalization_44_beta.bin"); - void* batch_normalization_44_beta = readTrainedWeights(batch_normalization_44_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_mean_path = dir_prefix + std::string("batch_normalization_44_mean.bin"); - void* batch_normalization_44_mean = readTrainedWeights(batch_normalization_44_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_variance_path = dir_prefix + std::string("batch_normalization_44_variance.bin"); - void* batch_normalization_44_variance = readTrainedWeights(batch_normalization_44_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_45_w_path = dir_prefix + std::string("conv2d_45_w.bin"); - void* conv2d_45_w = readTrainedWeights(conv2d_45_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_45_b_path = dir_prefix + std::string("conv2d_45_b.bin"); - void* conv2d_45_b = readTrainedWeights(conv2d_45_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_gamma_path = dir_prefix + std::string("batch_normalization_45_gamma.bin"); - void* batch_normalization_45_gamma = readTrainedWeights(batch_normalization_45_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_beta_path = dir_prefix + std::string("batch_normalization_45_beta.bin"); - void* batch_normalization_45_beta = readTrainedWeights(batch_normalization_45_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_mean_path = dir_prefix + std::string("batch_normalization_45_mean.bin"); - void* batch_normalization_45_mean = readTrainedWeights(batch_normalization_45_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_variance_path = dir_prefix + std::string("batch_normalization_45_variance.bin"); - void* batch_normalization_45_variance = readTrainedWeights(batch_normalization_45_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_46_w_path = dir_prefix + std::string("conv2d_46_w.bin"); - void* conv2d_46_w = readTrainedWeights(conv2d_46_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_46_b_path = dir_prefix + std::string("conv2d_46_b.bin"); - void* conv2d_46_b = readTrainedWeights(conv2d_46_b_path.c_str(), 0,1,2048,1,1); - std::string conv2d_47_w_path = dir_prefix + std::string("conv2d_47_w.bin"); - void* conv2d_47_w = readTrainedWeights(conv2d_47_w_path.c_str(), 0,2048,1024,1,1); - std::string conv2d_47_b_path = dir_prefix + std::string("conv2d_47_b.bin"); - void* conv2d_47_b = readTrainedWeights(conv2d_47_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_gamma_path = dir_prefix + std::string("batch_normalization_46_gamma.bin"); - void* batch_normalization_46_gamma = readTrainedWeights(batch_normalization_46_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_beta_path = dir_prefix + std::string("batch_normalization_46_beta.bin"); - void* batch_normalization_46_beta = readTrainedWeights(batch_normalization_46_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_mean_path = dir_prefix + std::string("batch_normalization_46_mean.bin"); - void* batch_normalization_46_mean = readTrainedWeights(batch_normalization_46_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_variance_path = dir_prefix + std::string("batch_normalization_46_variance.bin"); - void* batch_normalization_46_variance = readTrainedWeights(batch_normalization_46_variance_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_gamma_path = dir_prefix + std::string("batch_normalization_47_gamma.bin"); - void* batch_normalization_47_gamma = readTrainedWeights(batch_normalization_47_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_beta_path = dir_prefix + std::string("batch_normalization_47_beta.bin"); - void* batch_normalization_47_beta = readTrainedWeights(batch_normalization_47_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_mean_path = dir_prefix + std::string("batch_normalization_47_mean.bin"); - void* batch_normalization_47_mean = readTrainedWeights(batch_normalization_47_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_variance_path = dir_prefix + std::string("batch_normalization_47_variance.bin"); - void* batch_normalization_47_variance = readTrainedWeights(batch_normalization_47_variance_path.c_str(), 0,1,2048,1,1); - std::string conv2d_48_w_path = dir_prefix + std::string("conv2d_48_w.bin"); - void* conv2d_48_w = readTrainedWeights(conv2d_48_w_path.c_str(), 0,512,2048,1,1); - std::string conv2d_48_b_path = dir_prefix + std::string("conv2d_48_b.bin"); - void* conv2d_48_b = readTrainedWeights(conv2d_48_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_gamma_path = dir_prefix + std::string("batch_normalization_48_gamma.bin"); - void* batch_normalization_48_gamma = readTrainedWeights(batch_normalization_48_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_beta_path = dir_prefix + std::string("batch_normalization_48_beta.bin"); - void* batch_normalization_48_beta = readTrainedWeights(batch_normalization_48_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_mean_path = dir_prefix + std::string("batch_normalization_48_mean.bin"); - void* batch_normalization_48_mean = readTrainedWeights(batch_normalization_48_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_variance_path = dir_prefix + std::string("batch_normalization_48_variance.bin"); - void* batch_normalization_48_variance = readTrainedWeights(batch_normalization_48_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_49_w_path = dir_prefix + std::string("conv2d_49_w.bin"); - void* conv2d_49_w = readTrainedWeights(conv2d_49_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_49_b_path = dir_prefix + std::string("conv2d_49_b.bin"); - void* conv2d_49_b = readTrainedWeights(conv2d_49_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_gamma_path = dir_prefix + std::string("batch_normalization_49_gamma.bin"); - void* batch_normalization_49_gamma = readTrainedWeights(batch_normalization_49_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_beta_path = dir_prefix + std::string("batch_normalization_49_beta.bin"); - void* batch_normalization_49_beta = readTrainedWeights(batch_normalization_49_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_mean_path = dir_prefix + std::string("batch_normalization_49_mean.bin"); - void* batch_normalization_49_mean = readTrainedWeights(batch_normalization_49_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_variance_path = dir_prefix + std::string("batch_normalization_49_variance.bin"); - void* batch_normalization_49_variance = readTrainedWeights(batch_normalization_49_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_50_w_path = dir_prefix + std::string("conv2d_50_w.bin"); - void* conv2d_50_w = readTrainedWeights(conv2d_50_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_50_b_path = dir_prefix + std::string("conv2d_50_b.bin"); - void* conv2d_50_b = readTrainedWeights(conv2d_50_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_gamma_path = dir_prefix + std::string("batch_normalization_50_gamma.bin"); - void* batch_normalization_50_gamma = readTrainedWeights(batch_normalization_50_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_beta_path = dir_prefix + std::string("batch_normalization_50_beta.bin"); - void* batch_normalization_50_beta = readTrainedWeights(batch_normalization_50_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_mean_path = dir_prefix + std::string("batch_normalization_50_mean.bin"); - void* batch_normalization_50_mean = readTrainedWeights(batch_normalization_50_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_variance_path = dir_prefix + std::string("batch_normalization_50_variance.bin"); - void* batch_normalization_50_variance = readTrainedWeights(batch_normalization_50_variance_path.c_str(), 0,1,2048,1,1); - std::string conv2d_51_w_path = dir_prefix + std::string("conv2d_51_w.bin"); - void* conv2d_51_w = readTrainedWeights(conv2d_51_w_path.c_str(), 0,512,2048,1,1); - std::string conv2d_51_b_path = dir_prefix + std::string("conv2d_51_b.bin"); - void* conv2d_51_b = readTrainedWeights(conv2d_51_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_gamma_path = dir_prefix + std::string("batch_normalization_51_gamma.bin"); - void* batch_normalization_51_gamma = readTrainedWeights(batch_normalization_51_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_beta_path = dir_prefix + std::string("batch_normalization_51_beta.bin"); - void* batch_normalization_51_beta = readTrainedWeights(batch_normalization_51_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_mean_path = dir_prefix + std::string("batch_normalization_51_mean.bin"); - void* batch_normalization_51_mean = readTrainedWeights(batch_normalization_51_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_variance_path = dir_prefix + std::string("batch_normalization_51_variance.bin"); - void* batch_normalization_51_variance = readTrainedWeights(batch_normalization_51_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_52_w_path = dir_prefix + std::string("conv2d_52_w.bin"); - void* conv2d_52_w = readTrainedWeights(conv2d_52_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_52_b_path = dir_prefix + std::string("conv2d_52_b.bin"); - void* conv2d_52_b = readTrainedWeights(conv2d_52_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_gamma_path = dir_prefix + std::string("batch_normalization_52_gamma.bin"); - void* batch_normalization_52_gamma = readTrainedWeights(batch_normalization_52_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_beta_path = dir_prefix + std::string("batch_normalization_52_beta.bin"); - void* batch_normalization_52_beta = readTrainedWeights(batch_normalization_52_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_mean_path = dir_prefix + std::string("batch_normalization_52_mean.bin"); - void* batch_normalization_52_mean = readTrainedWeights(batch_normalization_52_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_variance_path = dir_prefix + std::string("batch_normalization_52_variance.bin"); - void* batch_normalization_52_variance = readTrainedWeights(batch_normalization_52_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_53_w_path = dir_prefix + std::string("conv2d_53_w.bin"); - void* conv2d_53_w = readTrainedWeights(conv2d_53_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_53_b_path = dir_prefix + std::string("conv2d_53_b.bin"); - void* conv2d_53_b = readTrainedWeights(conv2d_53_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_gamma_path = dir_prefix + std::string("batch_normalization_53_gamma.bin"); - void* batch_normalization_53_gamma = readTrainedWeights(batch_normalization_53_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_beta_path = dir_prefix + std::string("batch_normalization_53_beta.bin"); - void* batch_normalization_53_beta = readTrainedWeights(batch_normalization_53_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_mean_path = dir_prefix + std::string("batch_normalization_53_mean.bin"); - void* batch_normalization_53_mean = readTrainedWeights(batch_normalization_53_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_variance_path = dir_prefix + std::string("batch_normalization_53_variance.bin"); - void* batch_normalization_53_variance = readTrainedWeights(batch_normalization_53_variance_path.c_str(), 0,1,2048,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,1000); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1000,1,1); - - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - // NOTE: The pooling stride is 3*3 while it should be 2*2 -- interface itself needs fixing -- fix this manually in this case - void* var_0 = ConvLayer_PROMISE2(input, -123.68, 151.061, conv2d_1_w, -0.574422012090683, 0.5646807488203113, conv2d_1_b, -0.004829655, 0.014784645, 3, 3, 2, 2, 0, 3, 2, 1, 0.0, 689.7822875976562, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = ConvLayer_PROMISE(var_1, -4.952117443084717, 12.02118032741582, conv2d_2_w, -0.5448235973715783, 0.2447893574833928, conv2d_2_b, -0.0001412337, 0.00017318528, 0, 0, 1, 1, -1, 0, -1, -9.212617980003357, 8.107657526016425, 9); - void* var_3 = tensorBatchNorm(var_2, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_4 = tensorRelu(var_3); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 5.801381123542811, conv2d_3_w, -0.18028786177933215, 0.21247629988193606, conv2d_3_b, -7.8663266e-05, 0.00018541634, 1, 1, 1, 1, -1, 0, -1, -6.834556140899658, 8.541351353645396, 9); - void* var_6 = tensorBatchNorm(var_5, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_7 = tensorRelu(var_6); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 9.866454153060971, conv2d_4_w, -0.2255178820490837, 0.2254851074665791, conv2d_4_b, -0.00017080337, 0.00021038808, 0, 0, 1, 1, -1, 0, -1, -3.595476400852203, 3.637018930196785, 9); - void* var_9 = tensorBatchNorm(var_8, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_10 = ConvLayer_PROMISE(var_1, -4.952117443084717, 12.02118032741582, conv2d_5_w, -0.43272915667295453, 0.29589187785983095, conv2d_5_b, -0.000107640364, 0.00013177324, 0, 0, 1, 1, -1, 0, -1, -7.581318395137787, 7.8835730876923265, 9); - void* var_11 = tensorBatchNorm(var_10, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_12 = tensorAdd(var_9, var_11); - void* var_13 = tensorRelu(var_12); - void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 5.885549548625953, conv2d_6_w, -0.17062100511789324, 0.1432653286457067, conv2d_6_b, -7.950033e-05, 0.000104833845, 0, 0, 1, 1, -1, 0, -1, -5.310503073692322, 3.8418860490322224, 9); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_17 = ConvLayer_PROMISE(var_16, 0.0, 4.006655237674757, conv2d_7_w, -0.15594010630249977, 0.15720265829563249, conv2d_7_b, -6.419372e-05, 6.503685e-05, 1, 1, 1, 1, -1, 0, -1, -3.4114532544612883, 3.075598966121696, 9); - void* var_18 = tensorBatchNorm(var_17, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_19 = tensorRelu(var_18); - void* var_20 = ConvLayer_PROMISE(var_19, 0.0, 4.186545849800112, conv2d_8_w, -0.1599232355505228, 0.17352246379853484, conv2d_8_b, -8.235522e-05, 0.000105946136, 0, 0, 1, 1, -1, 0, -1, -1.5299443051815034, 1.425760628223422, 9); - void* var_21 = tensorBatchNorm(var_20, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_22 = tensorAdd(var_21, var_13); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 6.36634494018557, conv2d_9_w, -0.14470596650242806, 0.14421831880510708, conv2d_9_b, -3.4270335e-05, 4.177745e-05, 0, 0, 1, 1, -1, 0, -1, -4.584994326114654, 3.8648653411866007, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 3.3001420612335437, conv2d_10_w, -0.12276832074671984, 0.12627632835507407, conv2d_10_b, -5.8183014e-05, 3.3546e-05, 1, 1, 1, 1, -1, 0, -1, -2.828902014493942, 3.0918669717311893, 9); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 5.313344509124818, conv2d_11_w, -0.1685639199912548, 0.16309838759899448, conv2d_11_b, -5.3248757e-05, 5.70645e-05, 0, 0, 1, 1, -1, 0, -1, -1.838510752558708, 1.3678752244711045, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorAdd(var_31, var_23); - void* var_33 = tensorRelu(var_32); - void* var_34 = ConvLayer_PROMISE(var_33, 0.0, 6.605899341106429, conv2d_12_w, -0.149728477448225, 0.13948052291572155, conv2d_12_b, -2.5221272e-05, 3.551765e-05, 0, 0, 2, 2, -1, 0, -1, -5.011460402488709, 3.915426737308551, 9); - void* var_35 = tensorBatchNorm(var_34, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_36 = tensorRelu(var_35); - void* var_37 = ConvLayer_PROMISE(var_36, 0.0, 3.794741600990312, conv2d_13_w, -0.09761696971952916, 0.11394361693412249, conv2d_13_b, -3.715329e-05, 2.9298411e-05, 1, 1, 1, 1, -1, 0, -1, -5.206686987876893, 4.520638871669791, 9); - void* var_38 = tensorBatchNorm(var_37, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_39 = tensorRelu(var_38); - void* var_40 = ConvLayer_PROMISE(var_39, 0.0, 3.7149479997158603, conv2d_14_w, -0.14844063371419908, 0.14925702929496953, conv2d_14_b, -6.0864673e-05, 5.4444306e-05, 0, 0, 1, 1, -1, 0, -1, -1.5011818276643754, 1.40834725618366, 9); - void* var_41 = tensorBatchNorm(var_40, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_42 = ConvLayer_PROMISE(var_33, 0.0, 6.605899341106429, conv2d_15_w, -0.1642171936035156, 0.16866817833483497, conv2d_15_b, -2.4068044e-05, 2.5504653e-05, 0, 0, 2, 2, -1, 0, -1, -4.410076716423035, 4.014970501422923, 9); - void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_44 = tensorAdd(var_41, var_43); - void* var_45 = tensorRelu(var_44); - void* var_46 = ConvLayer_PROMISE(var_45, 0.0, 6.518892978191488, conv2d_16_w, -0.09702376063913107, 0.1054209597408773, conv2d_16_b, -1.47610735e-05, 1.7075112e-05, 0, 0, 1, 1, -1, 0, -1, -4.87446900844574, 3.7661991298198862, 9); - void* var_47 = tensorBatchNorm(var_46, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_48 = tensorRelu(var_47); - void* var_49 = ConvLayer_PROMISE(var_48, 0.0, 3.259194364786183, conv2d_17_w, -0.08665236312896013, 0.0898308474570517, conv2d_17_b, -3.9163042e-05, 4.2771928e-05, 1, 1, 1, 1, -1, 0, -1, -2.673636848211288, 2.3574042041302774, 9); - void* var_50 = tensorBatchNorm(var_49, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_51 = tensorRelu(var_50); - void* var_52 = ConvLayer_PROMISE(var_51, 0.0, 3.641261647939746, conv2d_18_w, -0.12198246002197266, 0.1347003544867095, conv2d_18_b, -5.3173797e-05, 4.8076203e-05, 0, 0, 1, 1, -1, 0, -1, -1.0623184064626694, 0.916913630664359, 9); - void* var_53 = tensorBatchNorm(var_52, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_54 = tensorAdd(var_53, var_45); - void* var_55 = tensorRelu(var_54); - void* var_56 = ConvLayer_PROMISE(var_55, 0.0, 6.852215012073557, conv2d_19_w, -0.1122598509863019, 0.1435348897427337, conv2d_19_b, -1.20778e-05, 2.599136e-05, 0, 0, 1, 1, -1, 0, -1, -6.0281127138137816, 6.227049376964593, 9); - void* var_57 = tensorBatchNorm(var_56, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_58 = tensorRelu(var_57); - void* var_59 = ConvLayer_PROMISE(var_58, 0.0, 3.397107238292711, conv2d_20_w, -0.1049889962002635, 0.1349111200869117, conv2d_20_b, -2.7412994e-05, 3.9722e-05, 1, 1, 1, 1, -1, 0, -1, -4.057081372261047, 4.329259678363884, 9); - void* var_60 = tensorBatchNorm(var_59, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_61 = tensorRelu(var_60); - void* var_62 = ConvLayer_PROMISE(var_61, 0.0, 3.6484641625881262, conv2d_21_w, -0.1401274445652962, 0.12122062336653527, conv2d_21_b, -5.5854776e-05, 7.8164114e-05, 0, 0, 1, 1, -1, 0, -1, -1.626526164531708, 0.8401960272193048, 9); - void* var_63 = tensorBatchNorm(var_62, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_64 = tensorAdd(var_63, var_55); - void* var_65 = tensorRelu(var_64); - void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 6.820035747528095, conv2d_22_w, -0.16039140529930593, 0.18889211259782335, conv2d_22_b, -4.6078047e-05, 3.3613425e-05, 0, 0, 1, 1, -1, 0, -1, -4.6271090393066405, 4.527790556430912, 9); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = ConvLayer_PROMISE(var_68, 0.0, 4.432856665611537, conv2d_23_w, -0.11397356178611517, 0.10787127982825667, conv2d_23_b, -3.6726604e-05, 2.4220695e-05, 1, 1, 1, 1, -1, 0, -1, -3.697339488506317, 3.1427979104519426, 9); - void* var_70 = tensorBatchNorm(var_69, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_71 = tensorRelu(var_70); - void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 4.711423307418915, conv2d_24_w, -0.11341997660696507, 0.1437816035747536, conv2d_24_b, -2.7102393e-05, 3.091236e-05, 0, 0, 1, 1, -1, 0, -1, -1.4133628906011582, 1.2987316379547167, 9); - void* var_73 = tensorBatchNorm(var_72, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_74 = tensorAdd(var_73, var_65); - void* var_75 = tensorRelu(var_74); - void* var_76 = ConvLayer_PROMISE(var_75, 0.0, 7.624651549339404, conv2d_25_w, -0.10495923960208893, 0.12068889104576047, conv2d_25_b, -1.0208429e-05, 1.1486276e-05, 0, 0, 2, 2, -1, 0, -1, -3.87531214427948, 3.676609352588745, 9); - void* var_77 = tensorBatchNorm(var_76, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_78 = tensorRelu(var_77); - void* var_79 = ConvLayer_PROMISE(var_78, 0.0, 4.044620439529737, conv2d_26_w, -0.07615160812437534, 0.07977425544709099, conv2d_26_b, -2.4272886e-05, 1.6434806e-05, 1, 1, 1, 1, -1, 0, -1, -6.102653044223786, 4.761939919948585, 9); - void* var_80 = tensorBatchNorm(var_79, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_81 = tensorRelu(var_80); - void* var_82 = ConvLayer_PROMISE(var_81, 0.0, 3.4468260111809705, conv2d_27_w, -0.11533496034890414, 0.10714908299595141, conv2d_27_b, -3.225456e-05, 4.8422902e-05, 0, 0, 1, 1, -1, 0, -1, -1.319659793496132, 1.0189965035915467, 9); - void* var_83 = tensorBatchNorm(var_82, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_84 = ConvLayer_PROMISE(var_75, 0.0, 7.624651549339404, conv2d_28_w, -0.0966497472524643, 0.10240990699082783, conv2d_28_b, -1.4815519e-05, 1.554276e-05, 0, 0, 2, 2, -1, 0, -1, -3.9412443549633025, 3.863056869030064, 9); - void* var_85 = tensorBatchNorm(var_84, batch_normalization_28_gamma, batch_normalization_28_beta, batch_normalization_28_mean, batch_normalization_28_variance, 0.001); - void* var_86 = tensorAdd(var_83, var_85); - void* var_87 = tensorRelu(var_86); - void* var_88 = ConvLayer_PROMISE(var_87, 0.0, 6.879177100658442, conv2d_29_w, -0.06468586190789938, 0.08113565444201333, conv2d_29_b, -7.4607115e-06, 6.926009e-06, 0, 0, 1, 1, -1, 0, -1, -7.112777866363525, 4.633408185959027, 9); - void* var_89 = tensorBatchNorm(var_88, batch_normalization_29_gamma, batch_normalization_29_beta, batch_normalization_29_mean, batch_normalization_29_variance, 0.001); - void* var_90 = tensorRelu(var_89); - void* var_91 = ConvLayer_PROMISE(var_90, 0.0, 3.2354076790810105, conv2d_30_w, -0.06493933162838221, 0.07104272978752861, conv2d_30_b, -1.9349398e-05, 2.0178473e-05, 1, 1, 1, 1, -1, 0, -1, -3.226332322359085, 2.5138739056587447, 9); - void* var_92 = tensorBatchNorm(var_91, batch_normalization_30_gamma, batch_normalization_30_beta, batch_normalization_30_mean, batch_normalization_30_variance, 0.001); - void* var_93 = tensorRelu(var_92); - void* var_94 = ConvLayer_PROMISE(var_93, 0.0, 3.003848925829006, conv2d_31_w, -0.0918996930718422, 0.08853508594632167, conv2d_31_b, -4.2279236e-05, 5.5378885e-05, 0, 0, 1, 1, -1, 0, -1, -0.9247466986179351, 0.572747143149404, 9); - void* var_95 = tensorBatchNorm(var_94, batch_normalization_31_gamma, batch_normalization_31_beta, batch_normalization_31_mean, batch_normalization_31_variance, 0.001); - void* var_96 = tensorAdd(var_95, var_87); - void* var_97 = tensorRelu(var_96); - void* var_98 = ConvLayer_PROMISE(var_97, 0.0, 6.566591289043519, conv2d_32_w, -0.07145480328053236, 0.09098157961666606, conv2d_32_b, -1.0478255e-05, 1.4408147e-05, 0, 0, 1, 1, -1, 0, -1, -4.183038790225982, 3.5941159300804166, 9); - void* var_99 = tensorBatchNorm(var_98, batch_normalization_32_gamma, batch_normalization_32_beta, batch_normalization_32_mean, batch_normalization_32_variance, 0.001); - void* var_100 = tensorRelu(var_99); - void* var_101 = ConvLayer_PROMISE(var_100, 0.0, 3.0348211803436556, conv2d_33_w, -0.056237234909087414, 0.06478620118647821, conv2d_33_b, -2.2639133e-05, 2.6081116e-05, 1, 1, 1, 1, -1, 0, -1, -2.098393235206604, 1.706788736581844, 9); - void* var_102 = tensorBatchNorm(var_101, batch_normalization_33_gamma, batch_normalization_33_beta, batch_normalization_33_mean, batch_normalization_33_variance, 0.001); - void* var_103 = tensorRelu(var_102); - void* var_104 = ConvLayer_PROMISE(var_103, 0.0, 3.248518852949145, conv2d_34_w, -0.07141499005258084, 0.08281665176153225, conv2d_34_b, -3.221229e-05, 4.569047e-05, 0, 0, 1, 1, -1, 0, -1, -0.8273181943893433, 0.7378616912961369, 9); - void* var_105 = tensorBatchNorm(var_104, batch_normalization_34_gamma, batch_normalization_34_beta, batch_normalization_34_mean, batch_normalization_34_variance, 0.001); - void* var_106 = tensorAdd(var_105, var_97); - void* var_107 = tensorRelu(var_106); - void* var_108 = ConvLayer_PROMISE(var_107, 0.0, 6.7038991017341765, conv2d_35_w, -0.06838216692209244, 0.09303134681284767, conv2d_35_b, -1.047402e-05, 1.0168567e-05, 0, 0, 1, 1, -1, 0, -1, -4.168091129779816, 3.5077465448380494, 9); - void* var_109 = tensorBatchNorm(var_108, batch_normalization_35_gamma, batch_normalization_35_beta, batch_normalization_35_mean, batch_normalization_35_variance, 0.001); - void* var_110 = tensorRelu(var_109); - void* var_111 = ConvLayer_PROMISE(var_110, 0.0, 2.8976624414922814, conv2d_36_w, -0.05521866928786039, 0.06331418491154919, conv2d_36_b, -3.86494e-05, 2.5999781e-05, 1, 1, 1, 1, -1, 0, -1, -2.182177306175232, 2.0366714165211324, 9); - void* var_112 = tensorBatchNorm(var_111, batch_normalization_36_gamma, batch_normalization_36_beta, batch_normalization_36_mean, batch_normalization_36_variance, 0.001); - void* var_113 = tensorRelu(var_112); - void* var_114 = ConvLayer_PROMISE(var_113, 0.0, 3.1310220296382933, conv2d_37_w, -0.07256266868114472, 0.08391195811331292, conv2d_37_b, -4.8211587e-05, 4.7546604e-05, 0, 0, 1, 1, -1, 0, -1, -1.1372777166366577, 0.5528145518899268, 9); - void* var_115 = tensorBatchNorm(var_114, batch_normalization_37_gamma, batch_normalization_37_beta, batch_normalization_37_mean, batch_normalization_37_variance, 0.001); - void* var_116 = tensorAdd(var_115, var_107); - void* var_117 = tensorRelu(var_116); - void* var_118 = ConvLayer_PROMISE(var_117, 0.0, 6.625923678875129, conv2d_38_w, -0.06549047549813986, 0.10113389839232205, conv2d_38_b, -1.2351429e-05, 9.263066e-06, 0, 0, 1, 1, -1, 0, -1, -3.846879935503006, 3.639795066118241, 9); - void* var_119 = tensorBatchNorm(var_118, batch_normalization_38_gamma, batch_normalization_38_beta, batch_normalization_38_mean, batch_normalization_38_variance, 0.001); - void* var_120 = tensorRelu(var_119); - void* var_121 = ConvLayer_PROMISE(var_120, 0.0, 3.200671393632918, conv2d_39_w, -0.05184716333821415, 0.06296417640149599, conv2d_39_b, -2.4313656e-05, 3.812053e-05, 1, 1, 1, 1, -1, 0, -1, -1.9442583957910538, 1.5269825316667864, 9); - void* var_122 = tensorBatchNorm(var_121, batch_normalization_39_gamma, batch_normalization_39_beta, batch_normalization_39_mean, batch_normalization_39_variance, 0.001); - void* var_123 = tensorRelu(var_122); - void* var_124 = ConvLayer_PROMISE(var_123, 0.0, 4.040827783107826, conv2d_40_w, -0.0670140995979309, 0.0777734544128187, conv2d_40_b, -3.378767e-05, 2.5727571e-05, 0, 0, 1, 1, -1, 0, -1, -1.3243955926895141, 0.9261298480034093, 9); - void* var_125 = tensorBatchNorm(var_124, batch_normalization_40_gamma, batch_normalization_40_beta, batch_normalization_40_mean, batch_normalization_40_variance, 0.001); - void* var_126 = tensorAdd(var_125, var_117); - void* var_127 = tensorRelu(var_126); - void* var_128 = ConvLayer_PROMISE(var_127, 0.0, 6.8198375024796505, conv2d_41_w, -0.0710306192561984, 0.10828035335987954, conv2d_41_b, -1.3110192e-05, 1.5449377e-05, 0, 0, 1, 1, -1, 0, -1, -3.2434056091308596, 5.530628140926378, 9); - void* var_129 = tensorBatchNorm(var_128, batch_normalization_41_gamma, batch_normalization_41_beta, batch_normalization_41_mean, batch_normalization_41_variance, 0.001); - void* var_130 = tensorRelu(var_129); - void* var_131 = ConvLayer_PROMISE(var_130, 0.0, 4.811174154282, conv2d_42_w, -0.056100725468248125, 0.06774817473441476, conv2d_42_b, -2.7899796e-05, 3.0695155e-05, 1, 1, 1, 1, -1, 0, -1, -3.553957043647766, 3.0058912243844595, 9); - void* var_132 = tensorBatchNorm(var_131, batch_normalization_42_gamma, batch_normalization_42_beta, batch_normalization_42_mean, batch_normalization_42_variance, 0.001); - void* var_133 = tensorRelu(var_132); - void* var_134 = ConvLayer_PROMISE(var_133, 0.0, 6.503577950477883, conv2d_43_w, -0.06820484285801648, 0.0836490480080298, conv2d_43_b, -2.2592936e-05, 2.3876093e-05, 0, 0, 1, 1, -1, 0, -1, -2.760284422159195, 1.1501846584081763, 9); - void* var_135 = tensorBatchNorm(var_134, batch_normalization_43_gamma, batch_normalization_43_beta, batch_normalization_43_mean, batch_normalization_43_variance, 0.001); - void* var_136 = tensorAdd(var_135, var_127); - void* var_137 = tensorRelu(var_136); - void* var_138 = ConvLayer_PROMISE(var_137, 0.0, 7.423539982796591, conv2d_44_w, -0.06768814034759998, 0.07900290366262253, conv2d_44_b, -1.0954906e-05, 1.2313803e-05, 0, 0, 2, 2, -1, 0, -1, -3.8250768241882325, 3.133637444972998, 9); - void* var_139 = tensorBatchNorm(var_138, batch_normalization_44_gamma, batch_normalization_44_beta, batch_normalization_44_mean, batch_normalization_44_variance, 0.001); - void* var_140 = tensorRelu(var_139); - void* var_141 = ConvLayer_PROMISE(var_140, 0.0, 3.234270730257073, conv2d_45_w, -0.04219715926796198, 0.04603923132643117, conv2d_45_b, -1.9525614e-05, 2.6300824e-05, 1, 1, 1, 1, -1, 0, -1, -3.2753402066230777, 1.8960905054807824, 9); - void* var_142 = tensorBatchNorm(var_141, batch_normalization_45_gamma, batch_normalization_45_beta, batch_normalization_45_mean, batch_normalization_45_variance, 0.001); - void* var_143 = tensorRelu(var_142); - void* var_144 = ConvLayer_PROMISE(var_143, 0.0, 2.675833512783051, conv2d_46_w, -0.051137199997901915, 0.07428906522691328, conv2d_46_b, -2.6416203e-05, 3.079251e-05, 0, 0, 1, 1, -1, 0, -1, -0.6374539139270782, 0.6678488029241574, 9); - void* var_145 = tensorBatchNorm(var_144, batch_normalization_46_gamma, batch_normalization_46_beta, batch_normalization_46_mean, batch_normalization_46_variance, 0.001); - void* var_146 = ConvLayer_PROMISE(var_137, 0.0, 7.423539982796591, conv2d_47_w, -0.047168924897909165, 0.06949675244092963, conv2d_47_b, -1.2322937e-05, 2.1868867e-05, 0, 0, 2, 2, -1, 0, -1, -1.8896190267801285, 2.387520755291127, 9); - void* var_147 = tensorBatchNorm(var_146, batch_normalization_47_gamma, batch_normalization_47_beta, batch_normalization_47_mean, batch_normalization_47_variance, 0.001); - void* var_148 = tensorAdd(var_145, var_147); - void* var_149 = tensorRelu(var_148); - void* var_150 = ConvLayer_PROMISE(var_149, 0.0, 12.392736603737378, conv2d_48_w, -0.04417608780786395, 0.06200448917225007, conv2d_48_b, -6.6323187e-06, 7.1494946e-06, 0, 0, 1, 1, -1, 0, -1, -9.068103209495545, 5.912482521057253, 9); - void* var_151 = tensorBatchNorm(var_150, batch_normalization_48_gamma, batch_normalization_48_beta, batch_normalization_48_mean, batch_normalization_48_variance, 0.001); - void* var_152 = tensorRelu(var_151); - void* var_153 = ConvLayer_PROMISE(var_152, 0.0, 2.565971518278122, conv2d_49_w, -0.036550714168697596, 0.042889032773673605, conv2d_49_b, -3.1749918e-05, 3.1403273e-05, 1, 1, 1, 1, -1, 0, -1, -2.0715825698375703, 1.4426317431927056, 9); - void* var_154 = tensorBatchNorm(var_153, batch_normalization_49_gamma, batch_normalization_49_beta, batch_normalization_49_mean, batch_normalization_49_variance, 0.001); - void* var_155 = tensorRelu(var_154); - void* var_156 = ConvLayer_PROMISE(var_155, 0.0, 2.2121606218814973, conv2d_50_w, -0.04563436089083552, 0.07235725801438761, conv2d_50_b, -5.138708e-05, 5.6959605e-05, 0, 0, 1, 1, -1, 0, -1, -0.5048498404622078, 0.4972966857850613, 9); - void* var_157 = tensorBatchNorm(var_156, batch_normalization_50_gamma, batch_normalization_50_beta, batch_normalization_50_mean, batch_normalization_50_variance, 0.001); - void* var_158 = tensorAdd(var_157, var_149); - void* var_159 = tensorRelu(var_158); - void* var_160 = ConvLayer_PROMISE(var_159, 0.0, 12.996321228027455, conv2d_51_w, -0.051894455961883065, 0.07700131461024579, conv2d_51_b, -8.893526e-06, 7.6235174e-06, 0, 0, 1, 1, -1, 0, -1, -7.534810958862305, 7.1688279371266015, 9); - void* var_161 = tensorBatchNorm(var_160, batch_normalization_51_gamma, batch_normalization_51_beta, batch_normalization_51_mean, batch_normalization_51_variance, 0.001); - void* var_162 = tensorRelu(var_161); - void* var_163 = ConvLayer_PROMISE(var_162, 0.0, 2.806837086677553, conv2d_52_w, -0.032556386385113004, 0.038920990321785316, conv2d_52_b, -3.1544037e-05, 4.5056524e-05, 1, 1, 1, 1, -1, 0, -1, -1.6795331789255141, 0.9551341712474886, 9); - void* var_164 = tensorBatchNorm(var_163, batch_normalization_52_gamma, batch_normalization_52_beta, batch_normalization_52_mean, batch_normalization_52_variance, 0.001); - void* var_165 = tensorRelu(var_164); - void* var_166 = ConvLayer_PROMISE(var_165, 0.0, 2.7935527668000724, conv2d_53_w, -0.04313115822151303, 0.0774340439587877, conv2d_53_b, -2.8713988e-05, 4.1641888e-05, 0, 0, 1, 1, -1, 0, -1, -0.5173906384706497, 0.5710835611820362, 9); - void* var_167 = tensorBatchNorm(var_166, batch_normalization_53_gamma, batch_normalization_53_beta, batch_normalization_53_mean, batch_normalization_53_variance, 0.001); - void* var_168 = tensorAdd(var_167, var_159); - void* var_169 = tensorRelu(var_168); - void* var_170 = tensorPooling(var_169,1,7,7,0,0,7,7); - void* var_171 = FCLayer_PROMISE(var_170, 0.0, 5.305631495475859, dense_1_w, -0.09220413094758988, 0.24919447432458666, dense_1_b, -0.024729362, 0.028545722, -1, -6.579668023586273, 7.794472872257277, 9); - void* var_172 = tensorSoftmax(var_171); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_172); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_piped.cc deleted file mode 100644 index 1765f133353e127cfff9b6b45ea482a9b6e678b5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_piped.cc +++ /dev/null @@ -1,212 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 8.190712, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 8.190712, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 19.023172, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 19.023172, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 14.428051, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 14.428051, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 23.065294, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 23.065294, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 15.165984, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 15.165984, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 13.664733, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 13.664733, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 19.025272, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 19.025272, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 6.727217, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 6.727217, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.2003012, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 3.2003012, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.321189, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 4.321189, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.936297, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 2.936297, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 4.87262, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.87262, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 10.32133, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 10.32133, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 13.91, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 13.91, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -70.45701, 87.34367, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_15, classConfs, predictedLabels, relative_start, relative_end); - } - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc deleted file mode 100644 index 798bc8a1d761a0beca029c6ca1d8f6c543739ab3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc +++ /dev/null @@ -1,207 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 1000; - int offset = 5000; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - llvm_hpvm_initTensorRt(0); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 8.190712, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 8.190712, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 19.023172, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 19.023172, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 14.428051, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 14.428051, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 23.065294, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 23.065294, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 15.165984, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 15.165984, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 13.664733, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 13.664733, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 19.025272, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 19.025272, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 6.727217, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 6.727217, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.2003012, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 3.2003012, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.321189, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 4.321189, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.936297, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 2.936297, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 4.87262, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.87262, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 10.32133, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 10.32133, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 13.91, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 13.91, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -70.45701, 87.34367, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_15, classConfs, predictedLabels, relative_start, relative_end); - } - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_top5_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_top5_promise.cc deleted file mode 100644 index 7911c645679f31171e1c1f87facc1c1f82640adc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_top5_promise.cc +++ /dev/null @@ -1,137 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(3); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 4000; - //int batch_size = 2500; - int batch_size = 4000; - int offset = 5000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - // float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); - float accuracy = computeTop5Accuracy(labels, batch_size, var_15, 100); - - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_piped.cc deleted file mode 100644 index 19c802dc88bb9a140bf5022ee07ab55f408ac53f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_piped.cc +++ /dev/null @@ -1,214 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/vgg16_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816367, 2.0934217, conv2d_1_w, -0.53275156, 0.49437004, conv2d_1_b, -0.6403629, 0.2490165, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3590874671936035, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.3590874671936035, conv2d_2_w, -0.2688396, 0.20639156, conv2d_2_b, -0.7745511, 0.82006615, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.521231179237361, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 2.521231179237361, conv2d_3_w, -0.16776876, 0.14878987, conv2d_3_b, -0.35283303, 0.5154362, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2011985784769053, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 1.2011985784769053, conv2d_4_w, -0.088948585, 0.114222586, conv2d_4_b, -0.30250227, 0.36856708, 1, 1, 1, 1, 0, 2, 1, 0.0, 1.0359880930185312, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 1.0359880930185312, conv2d_5_w, -0.07739562, 0.10973293, conv2d_5_b, -0.15568458, 0.17634983, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.3004955950379369, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 0.3004955950379369, conv2d_6_w, -0.051649556, 0.05435231, conv2d_6_b, -0.07395447, 0.07996062, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.11490475405007583, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 0.11490475405007583, conv2d_7_w, -0.043513633, 0.07577866, conv2d_7_b, -0.06921874, 0.02660573, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.16232508487999475, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 0.16232508487999475, conv2d_8_w, -0.033842053, 0.045218028, conv2d_8_b, -0.022827804, 0.023845317, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.12424996573477909, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.12424996573477909, conv2d_9_w, -0.02211613, 0.032084666, conv2d_9_b, -0.02699063, 0.03773564, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.1746344865113496, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.1746344865113496, conv2d_10_w, -0.01979376, 0.034854397, conv2d_10_b, -0.036107242, 0.07056531, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.5751757621765137, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.5751757621765137, conv2d_11_w, -0.03452098, 0.046055835, conv2d_11_b, -0.051925894, 0.07039055, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7718751144409115, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.7718751144409115, conv2d_12_w, -0.025946895, 0.040090334, conv2d_12_b, -0.06049362, 0.12658806, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1728516906499844, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1728516906499844, conv2d_13_w, -0.021766115, 0.03315237, conv2d_13_b, -0.20705001, 0.117947325, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.0015769386291495, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.0015769386291495, dense_1_w, -0.042597745, 0.046707444, dense_1_b, -0.21937433, 0.2545502, 1, 0.0, 2.002361118793486, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.002361118793486, dense_2_w, -0.32550547, 0.30829763, dense_2_b, -1.1787822, 1.2378151, -1, -18.251470546722413, 24.17363445281988, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_15, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - // NOTE: Signal back to OpenTuner - signalPipeToOpenTuner(); - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc deleted file mode 100644 index 754429a3d5328ca011ffbca75cb5aa47273f3d69..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc +++ /dev/null @@ -1,208 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - -int main(int argc, char* argv[]){ - - int test_input_size = 5000; - int batch_size = 500; - int offset = 5000; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - bool shouldDumpClassConf = false; - float* classConfs; - int* predictedLabels; - if(argc > 7){ - shouldDumpClassConf = true; - classConfs = (float*) malloc(sizeof(float) * test_input_size); - predictedLabels = (int*) malloc(sizeof(int) * test_input_size); - } - - - - llvm_hpvm_initTensorRt(0); - - int missed = 0; - for (int i = 0 ; i < total_runs; i++){ - - if (missed >= to_skip){ - break; - } - - startMemTracking(); - - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - std::string dir_prefix = std::string("../model_params/vgg16_cifar10/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string labels32_path = dir_prefix + std::string("labels32.bin"); - - for(int i = 0; i < batch_count; i++){ - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816367, 2.0934217, conv2d_1_w, -0.53275156, 0.49437004, conv2d_1_b, -0.6403629, 0.2490165, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3590874671936035, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.3590874671936035, conv2d_2_w, -0.2688396, 0.20639156, conv2d_2_b, -0.7745511, 0.82006615, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.521231179237361, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 2.521231179237361, conv2d_3_w, -0.16776876, 0.14878987, conv2d_3_b, -0.35283303, 0.5154362, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2011985784769053, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 1.2011985784769053, conv2d_4_w, -0.088948585, 0.114222586, conv2d_4_b, -0.30250227, 0.36856708, 1, 1, 1, 1, 0, 2, 1, 0.0, 1.0359880930185312, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 1.0359880930185312, conv2d_5_w, -0.07739562, 0.10973293, conv2d_5_b, -0.15568458, 0.17634983, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.3004955950379369, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 0.3004955950379369, conv2d_6_w, -0.051649556, 0.05435231, conv2d_6_b, -0.07395447, 0.07996062, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.11490475405007583, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 0.11490475405007583, conv2d_7_w, -0.043513633, 0.07577866, conv2d_7_b, -0.06921874, 0.02660573, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.16232508487999475, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 0.16232508487999475, conv2d_8_w, -0.033842053, 0.045218028, conv2d_8_b, -0.022827804, 0.023845317, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.12424996573477909, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.12424996573477909, conv2d_9_w, -0.02211613, 0.032084666, conv2d_9_b, -0.02699063, 0.03773564, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.1746344865113496, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.1746344865113496, conv2d_10_w, -0.01979376, 0.034854397, conv2d_10_b, -0.036107242, 0.07056531, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.5751757621765137, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.5751757621765137, conv2d_11_w, -0.03452098, 0.046055835, conv2d_11_b, -0.051925894, 0.07039055, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7718751144409115, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.7718751144409115, conv2d_12_w, -0.025946895, 0.040090334, conv2d_12_b, -0.06049362, 0.12658806, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1728516906499844, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1728516906499844, conv2d_13_w, -0.021766115, 0.03315237, conv2d_13_b, -0.20705001, 0.117947325, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.0015769386291495, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.0015769386291495, dense_1_w, -0.042597745, 0.046707444, dense_1_b, -0.21937433, 0.2545502, 1, 0.0, 2.002361118793486, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.002361118793486, dense_2_w, -0.32550547, 0.30829763, dense_2_b, -1.1787822, 1.2378151, -1, -18.251470546722413, 24.17363445281988, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15); - final_accuracy += accuracy; - - - if(shouldDumpClassConf){ - int relative_start = start - offset; - int relative_end = end - offset; - copyClassConfsAndLabels(var_15, classConfs, predictedLabels, relative_start, relative_end); - } - - - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - if (final_accuracy < bench_acc) - missed += 1; - - - if(shouldDumpClassConf){ - int labels_start = offset; - int labels_end = offset + test_input_size; - uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end); - dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size); - } - - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_imagenet_piped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_imagenet_piped.cc deleted file mode 100644 index 99ee36b6eb811a29935071adc08cddcaeb457736..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_imagenet_piped.cc +++ /dev/null @@ -1,186 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 1; - int offset = 0; - - int test_input_size = 2000; - int batch_size = 20; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - - - std::string dir_prefix = std::string("/shared/hsharif3/vgg16_imagenet_1/"); - std::string input_path = dir_prefix + std::string("test_input_combined.bin"); - std::string labels_path = dir_prefix + std::string("test_labels_combined.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,25088,4096); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,4096,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,4096,4096); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,4096,1,1); - std::string dense_3_w_path = dir_prefix + std::string("dense_3_w.bin"); - void* dense_3_w = readTrainedWeights(dense_3_w_path.c_str(), 0,1,1,4096,1000); - std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); - void* dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0,1,1000,1,1); - - - for (int i = 0 ; i < total_runs; i++){ - - // NOTE: Wait on signal from OpenTuner - stallOnOpenTunerSignal(); - - startMemTracking(); - - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - void* var_0 = ConvLayer_PROMISE(input, -123.68, 151.061, conv2d_1_w, -0.5682651399970055, 0.5677501424551024, conv2d_1_b, -0.015828926, 2.064037, 1, 1, 1, 1, -1, 0, 1, 0.0, 407.96143194580145, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 407.96143194580145, conv2d_2_w, -0.13156980648636818, 0.2164201746285022, conv2d_2_b, -1.0271513, 0.9052184, 1, 1, 1, 1, 0, 2, 1, 0.0, 1973.2054975586288, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 1973.2054975586288, conv2d_3_w, -0.18644111251831055, 0.202149114727974, conv2d_3_b, -0.17922063, 0.36547425, 1, 1, 1, 1, -1, 0, 1, 0.0, 2386.9648486329534, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 2386.9648486329534, conv2d_4_w, -0.10804861642420292, 0.12427636455744764, conv2d_4_b, -0.59533477, 0.63375777, 1, 1, 1, 1, 0, 2, 1, 0.0, 4998.494643554761, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 4998.494643554761, conv2d_5_w, -0.08040237371623515, 0.09835810117424044, conv2d_5_b, -0.20097896, 0.34949613, 1, 1, 1, 1, -1, 0, 1, 0.0, 4637.92161425807, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 4637.92161425807, conv2d_6_w, -0.05306418750435114, 0.06628044287860436, conv2d_6_b, -0.18124875, 0.274845, 1, 1, 1, 1, -1, 0, 1, 0.0, 4365.822572754019, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 4365.822572754019, conv2d_7_w, -0.05084674355760217, 0.07320860563218634, conv2d_7_b, -0.14288792, 0.59477174, 1, 1, 1, 1, 0, 2, 1, 0.0, 5600.749117676456, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 5600.749117676456, conv2d_8_w, -0.04523278899490833, 0.053042236261070186, conv2d_8_b, -0.14548235, 0.3148451, 1, 1, 1, 1, -1, 0, 1, 0.0, 3240.830364746551, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 3240.830364746551, conv2d_9_w, -0.02917514201253653, 0.03586270406842279, conv2d_9_b, -0.08428453, 0.18237582, 1, 1, 1, 1, -1, 0, 1, 0.0, 1895.9044943847766, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 1895.9044943847766, conv2d_10_w, -0.029496615380048753, 0.04047201693058028, conv2d_10_b, -0.19835947, 0.33766547, 1, 1, 1, 1, 0, 2, 1, 0.0, 1273.674801757832, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 1273.674801757832, conv2d_11_w, -0.031951379626989365, 0.04218719156458998, conv2d_11_b, -0.3508028, 0.6397485, 1, 1, 1, 1, -1, 0, 1, 0.0, 652.76720800782, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 652.76720800782, conv2d_12_w, -0.028522676015272738, 0.03794213477522136, conv2d_12_b, -0.9171057, 0.7597668, 1, 1, 1, 1, -1, 0, 1, 0.0, 316.98977236938646, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 316.98977236938646, conv2d_13_w, -0.02821479567326605, 0.03854479804635069, conv2d_13_b, -0.50036746, 9.431553, 1, 1, 1, 1, 0, 2, 1, 0.0, 148.72470889282292, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 148.72470889282292, dense_1_w, -0.007091613108757884, 0.008147951829247227, dense_1_b, -0.78005254, 0.8555075, 1, 0.0, 40.64329356002882, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 40.64329356002882, dense_2_w, -0.012781758182682096, 0.01437051862943929, dense_2_b, -0.012339931, 1.2154555, 1, 0.0, 11.167800696373025, 9); - void* var_15 = FCLayer_PROMISE(var_14, 0.0, 11.167800696373025, dense_3_w, -0.02119149128906429, 0.02715564412623694, dense_3_b, -0.773357, 0.6615543, -1, -7.4482048592567445, 17.882177452087543, 9); - void* var_16 = tensorSoftmax(var_15); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_16); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - // NOTE: Signal back to OpenTuner - - signalPipeToOpenTuner(); - - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_imagenet_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_imagenet_promise.cc deleted file mode 100644 index 69d47078f30e62c4dc2d225dd1e1a2acd4da0c6a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_imagenet_promise.cc +++ /dev/null @@ -1,179 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - - -int total_runs = 1; -float bench_acc = 0; -int to_skip = 5; - - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - int offset = 0; - - int test_input_size = 2000; - int batch_size = 20; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - if (argc > 2){ - bench_acc = atof(argv[2]); - } - - if(argc > 3){ - to_skip = atoi(argv[3]); - } - - if(argc > 4){ - test_input_size = atoi(argv[4]); - } - - if(argc > 5){ - offset = atoi(argv[5]); - } - - if(argc > 6){ - batch_size = atoi(argv[6]); - } - - - - - std::string dir_prefix = std::string("/shared/hsharif3/vgg16_imagenet_1/"); - std::string input_path = dir_prefix + std::string("test_input_combined.bin"); - std::string labels_path = dir_prefix + std::string("test_labels_combined.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,25088,4096); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,4096,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,4096,4096); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,4096,1,1); - std::string dense_3_w_path = dir_prefix + std::string("dense_3_w.bin"); - void* dense_3_w = readTrainedWeights(dense_3_w_path.c_str(), 0,1,1,4096,1000); - std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); - void* dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0,1,1000,1,1); - - - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - void* var_0 = ConvLayer_PROMISE(input, -123.68, 151.061, conv2d_1_w, -0.5682651399970055, 0.5677501424551024, conv2d_1_b, -0.015828926, 2.064037, 1, 1, 1, 1, -1, 0, 1, 0.0, 407.96143194580145, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 407.96143194580145, conv2d_2_w, -0.13156980648636818, 0.2164201746285022, conv2d_2_b, -1.0271513, 0.9052184, 1, 1, 1, 1, 0, 2, 1, 0.0, 1973.2054975586288, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 1973.2054975586288, conv2d_3_w, -0.18644111251831055, 0.202149114727974, conv2d_3_b, -0.17922063, 0.36547425, 1, 1, 1, 1, -1, 0, 1, 0.0, 2386.9648486329534, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 2386.9648486329534, conv2d_4_w, -0.10804861642420292, 0.12427636455744764, conv2d_4_b, -0.59533477, 0.63375777, 1, 1, 1, 1, 0, 2, 1, 0.0, 4998.494643554761, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 4998.494643554761, conv2d_5_w, -0.08040237371623515, 0.09835810117424044, conv2d_5_b, -0.20097896, 0.34949613, 1, 1, 1, 1, -1, 0, 1, 0.0, 4637.92161425807, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 4637.92161425807, conv2d_6_w, -0.05306418750435114, 0.06628044287860436, conv2d_6_b, -0.18124875, 0.274845, 1, 1, 1, 1, -1, 0, 1, 0.0, 4365.822572754019, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 4365.822572754019, conv2d_7_w, -0.05084674355760217, 0.07320860563218634, conv2d_7_b, -0.14288792, 0.59477174, 1, 1, 1, 1, 0, 2, 1, 0.0, 5600.749117676456, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 5600.749117676456, conv2d_8_w, -0.04523278899490833, 0.053042236261070186, conv2d_8_b, -0.14548235, 0.3148451, 1, 1, 1, 1, -1, 0, 1, 0.0, 3240.830364746551, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 3240.830364746551, conv2d_9_w, -0.02917514201253653, 0.03586270406842279, conv2d_9_b, -0.08428453, 0.18237582, 1, 1, 1, 1, -1, 0, 1, 0.0, 1895.9044943847766, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 1895.9044943847766, conv2d_10_w, -0.029496615380048753, 0.04047201693058028, conv2d_10_b, -0.19835947, 0.33766547, 1, 1, 1, 1, 0, 2, 1, 0.0, 1273.674801757832, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 1273.674801757832, conv2d_11_w, -0.031951379626989365, 0.04218719156458998, conv2d_11_b, -0.3508028, 0.6397485, 1, 1, 1, 1, -1, 0, 1, 0.0, 652.76720800782, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 652.76720800782, conv2d_12_w, -0.028522676015272738, 0.03794213477522136, conv2d_12_b, -0.9171057, 0.7597668, 1, 1, 1, 1, -1, 0, 1, 0.0, 316.98977236938646, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 316.98977236938646, conv2d_13_w, -0.02821479567326605, 0.03854479804635069, conv2d_13_b, -0.50036746, 9.431553, 1, 1, 1, 1, 0, 2, 1, 0.0, 148.72470889282292, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 148.72470889282292, dense_1_w, -0.007091613108757884, 0.008147951829247227, dense_1_b, -0.78005254, 0.8555075, 1, 0.0, 40.64329356002882, 9); - - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 40.64329356002882, dense_2_w, -0.012781758182682096, 0.01437051862943929, dense_2_b, -0.012339931, 1.2154555, 1, 0.0, 11.167800696373025, 9); - void* var_15 = FCLayer_PROMISE(var_14, 0.0, 11.167800696373025, dense_3_w, -0.02119149128906429, 0.02715564412623694, dense_3_b, -0.773357, 0.6615543, -1, -7.4482048592567445, 17.882177452087543, 9); - void* var_16 = tensorSoftmax(var_15); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_16); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - } - - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar100_5.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar100_5.cc deleted file mode 100644 index 3ee273d70aea6d74cfa55f250e999b05506f9b21..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/vgg16_cifar100_5.cc +++ /dev/null @@ -1,167 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - //std::string input_path = dir_prefix + std::string("vgg16_cifar100_calib.bin"); - //std::string labels_path = dir_prefix + std::string("vgg16_cifar100_train_labels.bin"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 2500; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorAdd(var_12, conv2d_4_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorRelu(var_17); - void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorAdd(var_20, conv2d_6_b); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_7_b); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_8_b); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorAdd(var_32, conv2d_9_b); - void* var_34 = tensorRelu(var_33); - void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorAdd(var_36, conv2d_10_b); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_11_b); - void* var_42 = tensorRelu(var_41); - void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorAdd(var_44, conv2d_12_b); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorAdd(var_48, conv2d_13_b); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorGemmGPU(var_51, dense_1_w); - void* var_55 = tensorAdd(var_54, dense_1_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorGemmGPU(var_56, dense_2_w); - void* var_59 = tensorAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - //float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); - float accuracy = computeTop5Accuracy(labels, batch_size, var_60, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/global_knobs.txt b/hpvm/projects/hpvm-tensor-rt/global_knobs.txt deleted file mode 120000 index 3c40f2450d933e4e5680f61542004d3ccfc06778..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/global_knobs.txt +++ /dev/null @@ -1 +0,0 @@ -autotuner/data/global_knobs.txt \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/global_knobs.txt b/hpvm/projects/hpvm-tensor-rt/global_knobs.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee2cd80cb6e33da5e97ffe2e842644d7a705cdff --- /dev/null +++ b/hpvm/projects/hpvm-tensor-rt/global_knobs.txt @@ -0,0 +1,69 @@ +fp32,11 -1 1.0 tensorConvolution tensorConvApprox dev conv_fc_red +fp16,12 -1 1.5 tensorConvolution tensorConvApproxHalf2 install conv_fc_red +perf,121 1,2,0 2.0 tensorConvolution tensorConvApprox dev conv +perf,122 1,2,1 2.0 tensorConvolution tensorConvApprox dev conv +perf,123 2,1,0 2.0 tensorConvolution tensorConvApprox dev conv +perf,124 2,1,1 2.0 tensorConvolution tensorConvApprox dev conv +perf,125 1,3,0 1.5 tensorConvolution tensorConvApprox dev conv +perf,126 1,3,1 1.5 tensorConvolution tensorConvApprox dev conv +perf,127 1,3,2 1.5 tensorConvolution tensorConvApprox dev conv +perf,128 3,1,0 1.5 tensorConvolution tensorConvApprox dev conv +perf,129 3,1,1 1.5 tensorConvolution tensorConvApprox dev conv +perf,130 3,1,2 1.5 tensorConvolution tensorConvApprox dev conv +perf,131 1,4,0 1.33 tensorConvolution tensorConvApprox dev conv +perf,132 1,4,1 1.33 tensorConvolution tensorConvApprox dev conv +perf,133 1,4,2 1.33 tensorConvolution tensorConvApprox dev conv +perf,134 1,4,3 1.33 tensorConvolution tensorConvApprox dev conv +perf,135 4,1,0 1.33 tensorConvolution tensorConvApprox dev conv +perf,136 4,1,1 1.33 tensorConvolution tensorConvApprox dev conv +perf,137 4,1,2 1.33 tensorConvolution tensorConvApprox dev conv +perf,138 4,1,3 1.33 tensorConvolution tensorConvApprox dev conv +perf_fp16,151 1,2,0 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,152 1,2,1 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,153 2,1,0 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,154 2,1,1 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,155 1,3,0 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,156 1,3,1 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,157 1,3,2 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,158 3,1,0 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,159 3,1,1 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,160 3,1,2 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,161 1,4,0 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,162 1,4,1 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,163 1,4,2 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,164 1,4,3 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,165 4,1,0 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,166 4,1,1 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,167 4,1,2 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,168 4,1,3 2.0 tensorConvolution tensorConvApprox install conv +samp,231 2,0,1 2.0 tensorConvolution tensorConvApprox dev conv +samp,232 2,1,1 2.0 tensorConvolution tensorConvApprox dev conv +samp,233 3,0,1 1.5 tensorConvolution tensorConvApprox dev conv +samp,234 3,1,1 1.5 tensorConvolution tensorConvApprox dev conv +samp,235 3,2,1 1.5 tensorConvolution tensorConvApprox dev conv +samp,236 4,0,1 1.33 tensorConvolution tensorConvApprox dev conv +samp,237 4,1,1 1.33 tensorConvolution tensorConvApprox dev conv +samp,238 4,2,1 1.33 tensorConvolution tensorConvApprox dev conv +samp,239 4,3,1 1.33 tensorConvolution tensorConvApprox dev conv +samp_fp16,261 2,0,1 3.0 tensorConvolution tensorConvApprox install conv +samp_fp16,262 2,1,1 3.0 tensorConvolution tensorConvApprox install conv +samp_fp16,263 3,0,1 2.25 tensorConvolution tensorConvApprox install conv +samp_fp16,264 3,1,1 2.25 tensorConvolution tensorConvApprox install conv +samp_fp16,265 3,2,1 2.25 tensorConvolution tensorConvApprox install conv +samp_fp16,266 4,0,1 2.0 tensorConvolution tensorConvApprox install conv +samp_fp16,267 4,1,1 2.0 tensorConvolution tensorConvApprox install conv +samp_fp16,268 4,2,1 2.0 tensorConvolution tensorConvApprox install conv +samp_fp16,269 4,3,1 2.0 tensorConvolution tensorConvApprox install conv +red_samp,41 1 1.5 tensorReduction tensorReduction dev red +red_samp,42 1 2.25 tensorReduction tensorReduction dev red +red_samp,43 1 1.4 tensorReduction tensorReduction dev red +red_samp,44 1 2 tensorReduction tensorReduction dev red +red_samp,45 1 1.25 tensorReduction tensorReduction dev red +red_samp,46 1 1.8 tensorReduction tensorReduction dev red +swing_level,1 1 12 tensorConvolution tensorConvApprox install conv_fc +swing_level,2 1 10 tensorConvolution tensorConvApprox install conv_fc +swing_level,3 1 9 tensorConvolution tensorConvApprox install conv_fc +swing_level,4 1 8 tensorConvolution tensorConvApprox install conv_fc +swing_level,5 1 6 tensorConvolution tensorConvApprox install conv_fc +swing_level,6 1 5 tensorConvolution tensorConvApprox install conv_fc +swing_level,7 1 4 tensorConvolution tensorConvApprox install conv_fc diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv1.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv1.bin deleted file mode 100644 index 89ab6ad37cac94360f7f87c93676f353829f1deb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv1.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv1_bias.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv1_bias.bin deleted file mode 100644 index 0a2a381337e13fe52959c838b4a2bedab3c3f8ab..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv1_bias.bin +++ /dev/null @@ -1 +0,0 @@ -h4Q;¤ù;34¼j0_½G½–h;ìz/½ðÇÊ:àk¥¼{l½t+O;u¼8™¨¼d»”½®¼}8›<íO’¼äÕ¿»¤#½„ö¼”u<¼¿l…¼f¢;Ð4½ŠO ½>Øž¼7K¼04½ÎG:à'½ÔOF½M=; \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv2.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv2.bin deleted file mode 100644 index 6cd00b88c5be6e212f2d3a37c8ea2a8edb1ceca7..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv2.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv2_bias.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv2_bias.bin deleted file mode 100644 index c0adf3e885ce855a0cc9d1b4b12f73665187159e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/conv2_bias.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc1.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc1.bin deleted file mode 100644 index 152c5bb0baae480f6b8d317889fc68f8d77247b6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc1.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc1_bias.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc1_bias.bin deleted file mode 100644 index 58221f45cdc56049b2edc29c244ea9d797a87fb5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc1_bias.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc2.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc2.bin deleted file mode 100644 index 97d78a9610b15be285661c1d762026c9fa4100cb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc2.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc2_bias.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc2_bias.bin deleted file mode 100644 index cbda59beef150dfbca756621286f042ec8e247bf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/fc2_bias.bin +++ /dev/null @@ -1 +0,0 @@ -Ê%”½ùb½Ó„g½W•½$VĽéum½'Ƶ½J§’½·¾¶½›¢½ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/input.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/input.bin deleted file mode 100644 index 4d2423f74188cfe0364185ccb66837785ccf4c4e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/input.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/labels.bin deleted file mode 100644 index 5e1f3881897f4729d6d90ff208a08ccdabb8fe7c..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/labels32.bin b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/labels32.bin deleted file mode 100644 index 6f1d7576cd18621a2cf646d0dd835846623589e5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/labels32.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/quant_ranges.txt b/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/quant_ranges.txt deleted file mode 100644 index af4d13d6f8e6b5902ff743b07ef6875d644df91a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/quant_ranges.txt +++ /dev/null @@ -1,4 +0,0 @@ -0 1 -1 1 -1 1 -1 1 --1 1 -1 1 -1 1 -1 1 --1 1 -1 1 -1 1 -1 1 --1 1 -1 1 -1 1 -1 1 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/#layer_composition.txt# b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/#layer_composition.txt# deleted file mode 100644 index 10692997a90e4490a91ad3d0e6e04285754144fd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/#layer_composition.txt# +++ /dev/null @@ -1,83 +0,0 @@ -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation -pool -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/#layers.txt# b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/#layers.txt# deleted file mode 100644 index 0bd2b554374c10d748a652f52e5427c716be0084..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/#layers.txt# +++ /dev/null @@ -1,83 +0,0 @@ -Conv1,10000,3,32,32,32,3,3,3 -#tensorBatchNorm1 -#tensorRelu1 -#tensorDepthwiseConv1 -#tensorBatchNorm2 -#tensorRelu2 -Conv2,10000,32,32,32,64,32,1,1 -#tensorBatchNorm3 -#tensorRelu3 -#tensorDepthwiseConv2 -#tensorBatchNorm4 -#tensorRelu4 -Conv3,10000,64,16,16,128,64,1,1 -#tensorBatchNorm5 -#tensorRelu5 -#tensorDepthwiseConv3 -#tensorBatchNorm6 -#tensorRelu6 -Conv4,10000,128,16,16,128,128,1,1 -#tensorBatchNorm7 -#tensorRelu7 -#tensorDepthwiseConv4 -#tensorBatchNorm8 -#tensorRelu8 -Conv5,10000,128,8,8,256,128,1,1 -#tensorBatchNorm9 -#tensorRelu9 -#tensorDepthwiseConv5 -#tensorBatchNorm10 -#tensorRelu10 -Conv6,10000,256,8,8,256,256,1,1 -#tensorBatchNorm11 -#tensorRelu11 -#tensorDepthwiseConv6 -#tensorBatchNorm12 -#tensorRelu12 -Conv7,10000,256,4,4,512,256,1,1 -#tensorBatchNorm13 -#tensorRelu13 -#tensorDepthwiseConv7 -#tensorBatchNorm14 -#tensorRelu14 -Conv8,10000,512,4,4,512,512,1,1 -#tensorBatchNorm15 -#tensorRelu15 -#tensorDepthwiseConv8 -#tensorBatchNorm16 -#tensorRelu16 -Conv9,10000,512,4,4,512,512,1,1 -#tensorBatchNorm17 -#tensorRelu17 -#tensorDepthwiseConv9 -#tensorBatchNorm18 -#tensorRelu18 -Conv10,10000,512,4,4,512,512,1,1 -#tensorBatchNorm19 -#tensorRelu19 -#tensorDepthwiseConv10 -#tensorBatchNorm20 -#tensorRelu20 -Conv11,10000,512,4,4,512,512,1,1 -#tensorBatchNorm21 -#tensorRelu21 -#tensorDepthwiseConv11 -#tensorBatchNorm22 -#tensorRelu22 -Conv12,10000,512,4,4,512,512,1,1 -#tensorBatchNorm23 -#tensorRelu23 -#tensorDepthwiseConv12 -#tensorBatchNorm24 -#tensorRelu24 -Conv13,10000,512,2,2,1024,512,1,1 -#tensorBatchNorm25 -#tensorRelu25 -#tensorDepthwiseConv13 -#tensorBatchNorm26 -#tensorRelu26 -Conv14,10000,1024,2,2,1024,1024,1,1 -#tensorBatchNorm27 -#tensorRelu27 -#tensorPooling1 -FC1,10000,1024,1024,10 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/approxhpvm_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/approxhpvm_src.cc deleted file mode 100644 index 5089eb912bcb5335c96c04f6d98f5d17ab761c72..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/approxhpvm_src.cc +++ /dev/null @@ -1,2400 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_2_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 32); - __visc__return(2, r, (size_t) 0); -} - -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_5_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_6_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_8_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 64); - __visc__return(2, r, (size_t) 0); -} - -void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_11_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_13_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_14_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 128); - __visc__return(2, r, (size_t) 0); -} - -void var_16_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_17_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_20_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 128); - __visc__return(2, r, (size_t) 0); -} - -void var_22_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_23_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_26_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 256); - __visc__return(2, r, (size_t) 0); -} - -void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_29_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_32_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_33_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 256); - __visc__return(2, r, (size_t) 0); -} - -void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_35_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_36_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_38_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_39_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); - __visc__return(2, r, (size_t) 0); -} - -void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_41_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_42_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_43_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_44_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_45_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); - __visc__return(2, r, (size_t) 0); -} - -void var_46_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_47_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_48_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_49_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_50_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_51_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); - __visc__return(2, r, (size_t) 0); -} - -void var_52_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_53_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_54_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_55_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_56_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_57_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); - __visc__return(2, r, (size_t) 0); -} - -void var_58_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_59_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_60_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_61_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_62_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_63_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); - __visc__return(2, r, (size_t) 0); -} - -void var_64_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_65_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_66_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_67_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_68_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_69_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 512); - __visc__return(2, r, (size_t) 0); -} - -void var_70_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_71_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_72_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_73_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_74_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_75_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 1024); - __visc__return(2, r, (size_t) 0); -} - -void var_76_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_77_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_78_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_79_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_80_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_81_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_avg(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_82_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_83_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_84_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t input_bytes, - void* conv2d_1_w, size_t conv2d_1_w_bytes, - void* batch_normalization_1_gamma, size_t batch_normalization_1_gamma_bytes, - void* batch_normalization_1_beta, size_t batch_normalization_1_beta_bytes, - void* batch_normalization_1_mean, size_t batch_normalization_1_mean_bytes, - void* batch_normalization_1_variance, size_t batch_normalization_1_variance_bytes, - void* depthwise_conv2d_1_w, size_t depthwise_conv2d_1_w_bytes, - void* batch_normalization_2_gamma, size_t batch_normalization_2_gamma_bytes, - void* batch_normalization_2_beta, size_t batch_normalization_2_beta_bytes, - void* batch_normalization_2_mean, size_t batch_normalization_2_mean_bytes, - void* batch_normalization_2_variance, size_t batch_normalization_2_variance_bytes, - void* conv2d_2_w, size_t conv2d_2_w_bytes, - void* batch_normalization_3_gamma, size_t batch_normalization_3_gamma_bytes, - void* batch_normalization_3_beta, size_t batch_normalization_3_beta_bytes, - void* batch_normalization_3_mean, size_t batch_normalization_3_mean_bytes, - void* batch_normalization_3_variance, size_t batch_normalization_3_variance_bytes, - void* depthwise_conv2d_2_w, size_t depthwise_conv2d_2_w_bytes, - void* batch_normalization_4_gamma, size_t batch_normalization_4_gamma_bytes, - void* batch_normalization_4_beta, size_t batch_normalization_4_beta_bytes, - void* batch_normalization_4_mean, size_t batch_normalization_4_mean_bytes, - void* batch_normalization_4_variance, size_t batch_normalization_4_variance_bytes, - void* conv2d_3_w, size_t conv2d_3_w_bytes, - void* batch_normalization_5_gamma, size_t batch_normalization_5_gamma_bytes, - void* batch_normalization_5_beta, size_t batch_normalization_5_beta_bytes, - void* batch_normalization_5_mean, size_t batch_normalization_5_mean_bytes, - void* batch_normalization_5_variance, size_t batch_normalization_5_variance_bytes, - void* depthwise_conv2d_3_w, size_t depthwise_conv2d_3_w_bytes, - void* batch_normalization_6_gamma, size_t batch_normalization_6_gamma_bytes, - void* batch_normalization_6_beta, size_t batch_normalization_6_beta_bytes, - void* batch_normalization_6_mean, size_t batch_normalization_6_mean_bytes, - void* batch_normalization_6_variance, size_t batch_normalization_6_variance_bytes, - void* conv2d_4_w, size_t conv2d_4_w_bytes, - void* batch_normalization_7_gamma, size_t batch_normalization_7_gamma_bytes, - void* batch_normalization_7_beta, size_t batch_normalization_7_beta_bytes, - void* batch_normalization_7_mean, size_t batch_normalization_7_mean_bytes, - void* batch_normalization_7_variance, size_t batch_normalization_7_variance_bytes, - void* depthwise_conv2d_4_w, size_t depthwise_conv2d_4_w_bytes, - void* batch_normalization_8_gamma, size_t batch_normalization_8_gamma_bytes, - void* batch_normalization_8_beta, size_t batch_normalization_8_beta_bytes, - void* batch_normalization_8_mean, size_t batch_normalization_8_mean_bytes, - void* batch_normalization_8_variance, size_t batch_normalization_8_variance_bytes, - void* conv2d_5_w, size_t conv2d_5_w_bytes, - void* batch_normalization_9_gamma, size_t batch_normalization_9_gamma_bytes, - void* batch_normalization_9_beta, size_t batch_normalization_9_beta_bytes, - void* batch_normalization_9_mean, size_t batch_normalization_9_mean_bytes, - void* batch_normalization_9_variance, size_t batch_normalization_9_variance_bytes, - void* depthwise_conv2d_5_w, size_t depthwise_conv2d_5_w_bytes, - void* batch_normalization_10_gamma, size_t batch_normalization_10_gamma_bytes, - void* batch_normalization_10_beta, size_t batch_normalization_10_beta_bytes, - void* batch_normalization_10_mean, size_t batch_normalization_10_mean_bytes, - void* batch_normalization_10_variance, size_t batch_normalization_10_variance_bytes, - void* conv2d_6_w, size_t conv2d_6_w_bytes, - void* batch_normalization_11_gamma, size_t batch_normalization_11_gamma_bytes, - void* batch_normalization_11_beta, size_t batch_normalization_11_beta_bytes, - void* batch_normalization_11_mean, size_t batch_normalization_11_mean_bytes, - void* batch_normalization_11_variance, size_t batch_normalization_11_variance_bytes, - void* depthwise_conv2d_6_w, size_t depthwise_conv2d_6_w_bytes, - void* batch_normalization_12_gamma, size_t batch_normalization_12_gamma_bytes, - void* batch_normalization_12_beta, size_t batch_normalization_12_beta_bytes, - void* batch_normalization_12_mean, size_t batch_normalization_12_mean_bytes, - void* batch_normalization_12_variance, size_t batch_normalization_12_variance_bytes, - void* conv2d_7_w, size_t conv2d_7_w_bytes, - void* batch_normalization_13_gamma, size_t batch_normalization_13_gamma_bytes, - void* batch_normalization_13_beta, size_t batch_normalization_13_beta_bytes, - void* batch_normalization_13_mean, size_t batch_normalization_13_mean_bytes, - void* batch_normalization_13_variance, size_t batch_normalization_13_variance_bytes, - void* depthwise_conv2d_7_w, size_t depthwise_conv2d_7_w_bytes, - void* batch_normalization_14_gamma, size_t batch_normalization_14_gamma_bytes, - void* batch_normalization_14_beta, size_t batch_normalization_14_beta_bytes, - void* batch_normalization_14_mean, size_t batch_normalization_14_mean_bytes, - void* batch_normalization_14_variance, size_t batch_normalization_14_variance_bytes, - void* conv2d_8_w, size_t conv2d_8_w_bytes, - void* batch_normalization_15_gamma, size_t batch_normalization_15_gamma_bytes, - void* batch_normalization_15_beta, size_t batch_normalization_15_beta_bytes, - void* batch_normalization_15_mean, size_t batch_normalization_15_mean_bytes, - void* batch_normalization_15_variance, size_t batch_normalization_15_variance_bytes, - void* depthwise_conv2d_8_w, size_t depthwise_conv2d_8_w_bytes, - void* batch_normalization_16_gamma, size_t batch_normalization_16_gamma_bytes, - void* batch_normalization_16_beta, size_t batch_normalization_16_beta_bytes, - void* batch_normalization_16_mean, size_t batch_normalization_16_mean_bytes, - void* batch_normalization_16_variance, size_t batch_normalization_16_variance_bytes, - void* conv2d_9_w, size_t conv2d_9_w_bytes, - void* batch_normalization_17_gamma, size_t batch_normalization_17_gamma_bytes, - void* batch_normalization_17_beta, size_t batch_normalization_17_beta_bytes, - void* batch_normalization_17_mean, size_t batch_normalization_17_mean_bytes, - void* batch_normalization_17_variance, size_t batch_normalization_17_variance_bytes, - void* depthwise_conv2d_9_w, size_t depthwise_conv2d_9_w_bytes, - void* batch_normalization_18_gamma, size_t batch_normalization_18_gamma_bytes, - void* batch_normalization_18_beta, size_t batch_normalization_18_beta_bytes, - void* batch_normalization_18_mean, size_t batch_normalization_18_mean_bytes, - void* batch_normalization_18_variance, size_t batch_normalization_18_variance_bytes, - void* conv2d_10_w, size_t conv2d_10_w_bytes, - void* batch_normalization_19_gamma, size_t batch_normalization_19_gamma_bytes, - void* batch_normalization_19_beta, size_t batch_normalization_19_beta_bytes, - void* batch_normalization_19_mean, size_t batch_normalization_19_mean_bytes, - void* batch_normalization_19_variance, size_t batch_normalization_19_variance_bytes, - void* depthwise_conv2d_10_w, size_t depthwise_conv2d_10_w_bytes, - void* batch_normalization_20_gamma, size_t batch_normalization_20_gamma_bytes, - void* batch_normalization_20_beta, size_t batch_normalization_20_beta_bytes, - void* batch_normalization_20_mean, size_t batch_normalization_20_mean_bytes, - void* batch_normalization_20_variance, size_t batch_normalization_20_variance_bytes, - void* conv2d_11_w, size_t conv2d_11_w_bytes, - void* batch_normalization_21_gamma, size_t batch_normalization_21_gamma_bytes, - void* batch_normalization_21_beta, size_t batch_normalization_21_beta_bytes, - void* batch_normalization_21_mean, size_t batch_normalization_21_mean_bytes, - void* batch_normalization_21_variance, size_t batch_normalization_21_variance_bytes, - void* depthwise_conv2d_11_w, size_t depthwise_conv2d_11_w_bytes, - void* batch_normalization_22_gamma, size_t batch_normalization_22_gamma_bytes, - void* batch_normalization_22_beta, size_t batch_normalization_22_beta_bytes, - void* batch_normalization_22_mean, size_t batch_normalization_22_mean_bytes, - void* batch_normalization_22_variance, size_t batch_normalization_22_variance_bytes, - void* conv2d_12_w, size_t conv2d_12_w_bytes, - void* batch_normalization_23_gamma, size_t batch_normalization_23_gamma_bytes, - void* batch_normalization_23_beta, size_t batch_normalization_23_beta_bytes, - void* batch_normalization_23_mean, size_t batch_normalization_23_mean_bytes, - void* batch_normalization_23_variance, size_t batch_normalization_23_variance_bytes, - void* depthwise_conv2d_12_w, size_t depthwise_conv2d_12_w_bytes, - void* batch_normalization_24_gamma, size_t batch_normalization_24_gamma_bytes, - void* batch_normalization_24_beta, size_t batch_normalization_24_beta_bytes, - void* batch_normalization_24_mean, size_t batch_normalization_24_mean_bytes, - void* batch_normalization_24_variance, size_t batch_normalization_24_variance_bytes, - void* conv2d_13_w, size_t conv2d_13_w_bytes, - void* batch_normalization_25_gamma, size_t batch_normalization_25_gamma_bytes, - void* batch_normalization_25_beta, size_t batch_normalization_25_beta_bytes, - void* batch_normalization_25_mean, size_t batch_normalization_25_mean_bytes, - void* batch_normalization_25_variance, size_t batch_normalization_25_variance_bytes, - void* depthwise_conv2d_13_w, size_t depthwise_conv2d_13_w_bytes, - void* batch_normalization_26_gamma, size_t batch_normalization_26_gamma_bytes, - void* batch_normalization_26_beta, size_t batch_normalization_26_beta_bytes, - void* batch_normalization_26_mean, size_t batch_normalization_26_mean_bytes, - void* batch_normalization_26_variance, size_t batch_normalization_26_variance_bytes, - void* conv2d_14_w, size_t conv2d_14_w_bytes, - void* batch_normalization_27_gamma, size_t batch_normalization_27_gamma_bytes, - void* batch_normalization_27_beta, size_t batch_normalization_27_beta_bytes, - void* batch_normalization_27_mean, size_t batch_normalization_27_mean_bytes, - void* batch_normalization_27_variance, size_t batch_normalization_27_variance_bytes, - void* dense_1_w, size_t dense_1_w_bytes, - void* dense_1_b, size_t dense_1_b_bytes){ - - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(138, input, conv2d_1_w, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, depthwise_conv2d_1_w, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, conv2d_2_w, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, depthwise_conv2d_2_w, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, conv2d_3_w, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, depthwise_conv2d_3_w, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, conv2d_4_w, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, depthwise_conv2d_4_w, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, conv2d_5_w, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, depthwise_conv2d_5_w, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, conv2d_6_w, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, depthwise_conv2d_6_w, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, conv2d_7_w, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, depthwise_conv2d_7_w, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, conv2d_8_w, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, depthwise_conv2d_8_w, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, conv2d_9_w, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, depthwise_conv2d_9_w, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, conv2d_10_w, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, depthwise_conv2d_10_w, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, conv2d_11_w, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, depthwise_conv2d_11_w, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, conv2d_12_w, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, depthwise_conv2d_12_w, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, conv2d_13_w, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, depthwise_conv2d_13_w, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, conv2d_14_w, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, dense_1_w, dense_1_b, 0); - - - void* var_0 = __visc__createNodeND(0, var_0_node); - - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); - - void* var_1 = __visc__createNodeND(0, var_1_node); - - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); - __visc__bindIn(var_1, 6, 4, 0); - __visc__bindIn(var_1, 7, 5, 0); - __visc__bindIn(var_1, 8, 6, 0); - __visc__bindIn(var_1, 9, 7, 0); - __visc__bindIn(var_1, 10, 8, 0); - __visc__bindIn(var_1, 11, 9, 0); - - void* var_2 = __visc__createNodeND(0, var_2_node); - - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - - void* var_3 = __visc__createNodeND(0, var_3_node); - - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - __visc__bindIn(var_3, 12, 2, 0); - __visc__bindIn(var_3, 13, 3, 0); - - void* var_4 = __visc__createNodeND(0, var_4_node); - - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 14, 2, 0); - __visc__bindIn(var_4, 15, 3, 0); - __visc__bindIn(var_4, 16, 4, 0); - __visc__bindIn(var_4, 17, 5, 0); - __visc__bindIn(var_4, 18, 6, 0); - __visc__bindIn(var_4, 19, 7, 0); - __visc__bindIn(var_4, 20, 8, 0); - __visc__bindIn(var_4, 21, 9, 0); - - void* var_5 = __visc__createNodeND(0, var_5_node); - - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - - void* var_6 = __visc__createNodeND(0, var_6_node); - - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); - __visc__bindIn(var_6, 22, 2, 0); - __visc__bindIn(var_6, 23, 3, 0); - - void* var_7 = __visc__createNodeND(0, var_7_node); - - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); - __visc__bindIn(var_7, 24, 2, 0); - __visc__bindIn(var_7, 25, 3, 0); - __visc__bindIn(var_7, 26, 4, 0); - __visc__bindIn(var_7, 27, 5, 0); - __visc__bindIn(var_7, 28, 6, 0); - __visc__bindIn(var_7, 29, 7, 0); - __visc__bindIn(var_7, 30, 8, 0); - __visc__bindIn(var_7, 31, 9, 0); - - void* var_8 = __visc__createNodeND(0, var_8_node); - - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - - void* var_9 = __visc__createNodeND(0, var_9_node); - - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - __visc__bindIn(var_9, 32, 2, 0); - __visc__bindIn(var_9, 33, 3, 0); - - void* var_10 = __visc__createNodeND(0, var_10_node); - - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); - __visc__bindIn(var_10, 34, 2, 0); - __visc__bindIn(var_10, 35, 3, 0); - __visc__bindIn(var_10, 36, 4, 0); - __visc__bindIn(var_10, 37, 5, 0); - __visc__bindIn(var_10, 38, 6, 0); - __visc__bindIn(var_10, 39, 7, 0); - __visc__bindIn(var_10, 40, 8, 0); - __visc__bindIn(var_10, 41, 9, 0); - - void* var_11 = __visc__createNodeND(0, var_11_node); - - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - - void* var_12 = __visc__createNodeND(0, var_12_node); - - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - __visc__bindIn(var_12, 42, 2, 0); - __visc__bindIn(var_12, 43, 3, 0); - - void* var_13 = __visc__createNodeND(0, var_13_node); - - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); - __visc__bindIn(var_13, 44, 2, 0); - __visc__bindIn(var_13, 45, 3, 0); - __visc__bindIn(var_13, 46, 4, 0); - __visc__bindIn(var_13, 47, 5, 0); - __visc__bindIn(var_13, 48, 6, 0); - __visc__bindIn(var_13, 49, 7, 0); - __visc__bindIn(var_13, 50, 8, 0); - __visc__bindIn(var_13, 51, 9, 0); - - void* var_14 = __visc__createNodeND(0, var_14_node); - - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - - void* var_15 = __visc__createNodeND(0, var_15_node); - - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 52, 2, 0); - __visc__bindIn(var_15, 53, 3, 0); - - void* var_16 = __visc__createNodeND(0, var_16_node); - - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); - __visc__bindIn(var_16, 54, 2, 0); - __visc__bindIn(var_16, 55, 3, 0); - __visc__bindIn(var_16, 56, 4, 0); - __visc__bindIn(var_16, 57, 5, 0); - __visc__bindIn(var_16, 58, 6, 0); - __visc__bindIn(var_16, 59, 7, 0); - __visc__bindIn(var_16, 60, 8, 0); - __visc__bindIn(var_16, 61, 9, 0); - - void* var_17 = __visc__createNodeND(0, var_17_node); - - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); - - void* var_18 = __visc__createNodeND(0, var_18_node); - - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 62, 2, 0); - __visc__bindIn(var_18, 63, 3, 0); - - void* var_19 = __visc__createNodeND(0, var_19_node); - - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - __visc__bindIn(var_19, 64, 2, 0); - __visc__bindIn(var_19, 65, 3, 0); - __visc__bindIn(var_19, 66, 4, 0); - __visc__bindIn(var_19, 67, 5, 0); - __visc__bindIn(var_19, 68, 6, 0); - __visc__bindIn(var_19, 69, 7, 0); - __visc__bindIn(var_19, 70, 8, 0); - __visc__bindIn(var_19, 71, 9, 0); - - void* var_20 = __visc__createNodeND(0, var_20_node); - - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); - - void* var_21 = __visc__createNodeND(0, var_21_node); - - __visc__edge(var_20, var_21, 1, 0, 0, 0); - __visc__edge(var_20, var_21, 1, 1, 1, 0); - __visc__bindIn(var_21, 72, 2, 0); - __visc__bindIn(var_21, 73, 3, 0); - - void* var_22 = __visc__createNodeND(0, var_22_node); - - __visc__edge(var_21, var_22, 1, 0, 0, 0); - __visc__edge(var_21, var_22, 1, 1, 1, 0); - __visc__bindIn(var_22, 74, 2, 0); - __visc__bindIn(var_22, 75, 3, 0); - __visc__bindIn(var_22, 76, 4, 0); - __visc__bindIn(var_22, 77, 5, 0); - __visc__bindIn(var_22, 78, 6, 0); - __visc__bindIn(var_22, 79, 7, 0); - __visc__bindIn(var_22, 80, 8, 0); - __visc__bindIn(var_22, 81, 9, 0); - - void* var_23 = __visc__createNodeND(0, var_23_node); - - __visc__edge(var_22, var_23, 1, 0, 0, 0); - __visc__edge(var_22, var_23, 1, 1, 1, 0); - - void* var_24 = __visc__createNodeND(0, var_24_node); - - __visc__edge(var_23, var_24, 1, 0, 0, 0); - __visc__edge(var_23, var_24, 1, 1, 1, 0); - __visc__bindIn(var_24, 82, 2, 0); - __visc__bindIn(var_24, 83, 3, 0); - - void* var_25 = __visc__createNodeND(0, var_25_node); - - __visc__edge(var_24, var_25, 1, 0, 0, 0); - __visc__edge(var_24, var_25, 1, 1, 1, 0); - __visc__bindIn(var_25, 84, 2, 0); - __visc__bindIn(var_25, 85, 3, 0); - __visc__bindIn(var_25, 86, 4, 0); - __visc__bindIn(var_25, 87, 5, 0); - __visc__bindIn(var_25, 88, 6, 0); - __visc__bindIn(var_25, 89, 7, 0); - __visc__bindIn(var_25, 90, 8, 0); - __visc__bindIn(var_25, 91, 9, 0); - - void* var_26 = __visc__createNodeND(0, var_26_node); - - __visc__edge(var_25, var_26, 1, 0, 0, 0); - __visc__edge(var_25, var_26, 1, 1, 1, 0); - - void* var_27 = __visc__createNodeND(0, var_27_node); - - __visc__edge(var_26, var_27, 1, 0, 0, 0); - __visc__edge(var_26, var_27, 1, 1, 1, 0); - __visc__bindIn(var_27, 92, 2, 0); - __visc__bindIn(var_27, 93, 3, 0); - - void* var_28 = __visc__createNodeND(0, var_28_node); - - __visc__edge(var_27, var_28, 1, 0, 0, 0); - __visc__edge(var_27, var_28, 1, 1, 1, 0); - __visc__bindIn(var_28, 94, 2, 0); - __visc__bindIn(var_28, 95, 3, 0); - __visc__bindIn(var_28, 96, 4, 0); - __visc__bindIn(var_28, 97, 5, 0); - __visc__bindIn(var_28, 98, 6, 0); - __visc__bindIn(var_28, 99, 7, 0); - __visc__bindIn(var_28, 100, 8, 0); - __visc__bindIn(var_28, 101, 9, 0); - - void* var_29 = __visc__createNodeND(0, var_29_node); - - __visc__edge(var_28, var_29, 1, 0, 0, 0); - __visc__edge(var_28, var_29, 1, 1, 1, 0); - - void* var_30 = __visc__createNodeND(0, var_30_node); - - __visc__edge(var_29, var_30, 1, 0, 0, 0); - __visc__edge(var_29, var_30, 1, 1, 1, 0); - __visc__bindIn(var_30, 102, 2, 0); - __visc__bindIn(var_30, 103, 3, 0); - - void* var_31 = __visc__createNodeND(0, var_31_node); - - __visc__edge(var_30, var_31, 1, 0, 0, 0); - __visc__edge(var_30, var_31, 1, 1, 1, 0); - __visc__bindIn(var_31, 104, 2, 0); - __visc__bindIn(var_31, 105, 3, 0); - __visc__bindIn(var_31, 106, 4, 0); - __visc__bindIn(var_31, 107, 5, 0); - __visc__bindIn(var_31, 108, 6, 0); - __visc__bindIn(var_31, 109, 7, 0); - __visc__bindIn(var_31, 110, 8, 0); - __visc__bindIn(var_31, 111, 9, 0); - - void* var_32 = __visc__createNodeND(0, var_32_node); - - __visc__edge(var_31, var_32, 1, 0, 0, 0); - __visc__edge(var_31, var_32, 1, 1, 1, 0); - - void* var_33 = __visc__createNodeND(0, var_33_node); - - __visc__edge(var_32, var_33, 1, 0, 0, 0); - __visc__edge(var_32, var_33, 1, 1, 1, 0); - __visc__bindIn(var_33, 112, 2, 0); - __visc__bindIn(var_33, 113, 3, 0); - - void* var_34 = __visc__createNodeND(0, var_34_node); - - __visc__edge(var_33, var_34, 1, 0, 0, 0); - __visc__edge(var_33, var_34, 1, 1, 1, 0); - __visc__bindIn(var_34, 114, 2, 0); - __visc__bindIn(var_34, 115, 3, 0); - __visc__bindIn(var_34, 116, 4, 0); - __visc__bindIn(var_34, 117, 5, 0); - __visc__bindIn(var_34, 118, 6, 0); - __visc__bindIn(var_34, 119, 7, 0); - __visc__bindIn(var_34, 120, 8, 0); - __visc__bindIn(var_34, 121, 9, 0); - - void* var_35 = __visc__createNodeND(0, var_35_node); - - __visc__edge(var_34, var_35, 1, 0, 0, 0); - __visc__edge(var_34, var_35, 1, 1, 1, 0); - - void* var_36 = __visc__createNodeND(0, var_36_node); - - __visc__edge(var_35, var_36, 1, 0, 0, 0); - __visc__edge(var_35, var_36, 1, 1, 1, 0); - __visc__bindIn(var_36, 122, 2, 0); - __visc__bindIn(var_36, 123, 3, 0); - - void* var_37 = __visc__createNodeND(0, var_37_node); - - __visc__edge(var_36, var_37, 1, 0, 0, 0); - __visc__edge(var_36, var_37, 1, 1, 1, 0); - __visc__bindIn(var_37, 124, 2, 0); - __visc__bindIn(var_37, 125, 3, 0); - __visc__bindIn(var_37, 126, 4, 0); - __visc__bindIn(var_37, 127, 5, 0); - __visc__bindIn(var_37, 128, 6, 0); - __visc__bindIn(var_37, 129, 7, 0); - __visc__bindIn(var_37, 130, 8, 0); - __visc__bindIn(var_37, 131, 9, 0); - - void* var_38 = __visc__createNodeND(0, var_38_node); - - __visc__edge(var_37, var_38, 1, 0, 0, 0); - __visc__edge(var_37, var_38, 1, 1, 1, 0); - - void* var_39 = __visc__createNodeND(0, var_39_node); - - __visc__edge(var_38, var_39, 1, 0, 0, 0); - __visc__edge(var_38, var_39, 1, 1, 1, 0); - __visc__bindIn(var_39, 132, 2, 0); - __visc__bindIn(var_39, 133, 3, 0); - - void* var_40 = __visc__createNodeND(0, var_40_node); - - __visc__edge(var_39, var_40, 1, 0, 0, 0); - __visc__edge(var_39, var_40, 1, 1, 1, 0); - __visc__bindIn(var_40, 134, 2, 0); - __visc__bindIn(var_40, 135, 3, 0); - __visc__bindIn(var_40, 136, 4, 0); - __visc__bindIn(var_40, 137, 5, 0); - __visc__bindIn(var_40, 138, 6, 0); - __visc__bindIn(var_40, 139, 7, 0); - __visc__bindIn(var_40, 140, 8, 0); - __visc__bindIn(var_40, 141, 9, 0); - - void* var_41 = __visc__createNodeND(0, var_41_node); - - __visc__edge(var_40, var_41, 1, 0, 0, 0); - __visc__edge(var_40, var_41, 1, 1, 1, 0); - - void* var_42 = __visc__createNodeND(0, var_42_node); - - __visc__edge(var_41, var_42, 1, 0, 0, 0); - __visc__edge(var_41, var_42, 1, 1, 1, 0); - __visc__bindIn(var_42, 142, 2, 0); - __visc__bindIn(var_42, 143, 3, 0); - - void* var_43 = __visc__createNodeND(0, var_43_node); - - __visc__edge(var_42, var_43, 1, 0, 0, 0); - __visc__edge(var_42, var_43, 1, 1, 1, 0); - __visc__bindIn(var_43, 144, 2, 0); - __visc__bindIn(var_43, 145, 3, 0); - __visc__bindIn(var_43, 146, 4, 0); - __visc__bindIn(var_43, 147, 5, 0); - __visc__bindIn(var_43, 148, 6, 0); - __visc__bindIn(var_43, 149, 7, 0); - __visc__bindIn(var_43, 150, 8, 0); - __visc__bindIn(var_43, 151, 9, 0); - - void* var_44 = __visc__createNodeND(0, var_44_node); - - __visc__edge(var_43, var_44, 1, 0, 0, 0); - __visc__edge(var_43, var_44, 1, 1, 1, 0); - - void* var_45 = __visc__createNodeND(0, var_45_node); - - __visc__edge(var_44, var_45, 1, 0, 0, 0); - __visc__edge(var_44, var_45, 1, 1, 1, 0); - __visc__bindIn(var_45, 152, 2, 0); - __visc__bindIn(var_45, 153, 3, 0); - - void* var_46 = __visc__createNodeND(0, var_46_node); - - __visc__edge(var_45, var_46, 1, 0, 0, 0); - __visc__edge(var_45, var_46, 1, 1, 1, 0); - __visc__bindIn(var_46, 154, 2, 0); - __visc__bindIn(var_46, 155, 3, 0); - __visc__bindIn(var_46, 156, 4, 0); - __visc__bindIn(var_46, 157, 5, 0); - __visc__bindIn(var_46, 158, 6, 0); - __visc__bindIn(var_46, 159, 7, 0); - __visc__bindIn(var_46, 160, 8, 0); - __visc__bindIn(var_46, 161, 9, 0); - - void* var_47 = __visc__createNodeND(0, var_47_node); - - __visc__edge(var_46, var_47, 1, 0, 0, 0); - __visc__edge(var_46, var_47, 1, 1, 1, 0); - - void* var_48 = __visc__createNodeND(0, var_48_node); - - __visc__edge(var_47, var_48, 1, 0, 0, 0); - __visc__edge(var_47, var_48, 1, 1, 1, 0); - __visc__bindIn(var_48, 162, 2, 0); - __visc__bindIn(var_48, 163, 3, 0); - - void* var_49 = __visc__createNodeND(0, var_49_node); - - __visc__edge(var_48, var_49, 1, 0, 0, 0); - __visc__edge(var_48, var_49, 1, 1, 1, 0); - __visc__bindIn(var_49, 164, 2, 0); - __visc__bindIn(var_49, 165, 3, 0); - __visc__bindIn(var_49, 166, 4, 0); - __visc__bindIn(var_49, 167, 5, 0); - __visc__bindIn(var_49, 168, 6, 0); - __visc__bindIn(var_49, 169, 7, 0); - __visc__bindIn(var_49, 170, 8, 0); - __visc__bindIn(var_49, 171, 9, 0); - - void* var_50 = __visc__createNodeND(0, var_50_node); - - __visc__edge(var_49, var_50, 1, 0, 0, 0); - __visc__edge(var_49, var_50, 1, 1, 1, 0); - - void* var_51 = __visc__createNodeND(0, var_51_node); - - __visc__edge(var_50, var_51, 1, 0, 0, 0); - __visc__edge(var_50, var_51, 1, 1, 1, 0); - __visc__bindIn(var_51, 172, 2, 0); - __visc__bindIn(var_51, 173, 3, 0); - - void* var_52 = __visc__createNodeND(0, var_52_node); - - __visc__edge(var_51, var_52, 1, 0, 0, 0); - __visc__edge(var_51, var_52, 1, 1, 1, 0); - __visc__bindIn(var_52, 174, 2, 0); - __visc__bindIn(var_52, 175, 3, 0); - __visc__bindIn(var_52, 176, 4, 0); - __visc__bindIn(var_52, 177, 5, 0); - __visc__bindIn(var_52, 178, 6, 0); - __visc__bindIn(var_52, 179, 7, 0); - __visc__bindIn(var_52, 180, 8, 0); - __visc__bindIn(var_52, 181, 9, 0); - - void* var_53 = __visc__createNodeND(0, var_53_node); - - __visc__edge(var_52, var_53, 1, 0, 0, 0); - __visc__edge(var_52, var_53, 1, 1, 1, 0); - - void* var_54 = __visc__createNodeND(0, var_54_node); - - __visc__edge(var_53, var_54, 1, 0, 0, 0); - __visc__edge(var_53, var_54, 1, 1, 1, 0); - __visc__bindIn(var_54, 182, 2, 0); - __visc__bindIn(var_54, 183, 3, 0); - - void* var_55 = __visc__createNodeND(0, var_55_node); - - __visc__edge(var_54, var_55, 1, 0, 0, 0); - __visc__edge(var_54, var_55, 1, 1, 1, 0); - __visc__bindIn(var_55, 184, 2, 0); - __visc__bindIn(var_55, 185, 3, 0); - __visc__bindIn(var_55, 186, 4, 0); - __visc__bindIn(var_55, 187, 5, 0); - __visc__bindIn(var_55, 188, 6, 0); - __visc__bindIn(var_55, 189, 7, 0); - __visc__bindIn(var_55, 190, 8, 0); - __visc__bindIn(var_55, 191, 9, 0); - - void* var_56 = __visc__createNodeND(0, var_56_node); - - __visc__edge(var_55, var_56, 1, 0, 0, 0); - __visc__edge(var_55, var_56, 1, 1, 1, 0); - - void* var_57 = __visc__createNodeND(0, var_57_node); - - __visc__edge(var_56, var_57, 1, 0, 0, 0); - __visc__edge(var_56, var_57, 1, 1, 1, 0); - __visc__bindIn(var_57, 192, 2, 0); - __visc__bindIn(var_57, 193, 3, 0); - - void* var_58 = __visc__createNodeND(0, var_58_node); - - __visc__edge(var_57, var_58, 1, 0, 0, 0); - __visc__edge(var_57, var_58, 1, 1, 1, 0); - __visc__bindIn(var_58, 194, 2, 0); - __visc__bindIn(var_58, 195, 3, 0); - __visc__bindIn(var_58, 196, 4, 0); - __visc__bindIn(var_58, 197, 5, 0); - __visc__bindIn(var_58, 198, 6, 0); - __visc__bindIn(var_58, 199, 7, 0); - __visc__bindIn(var_58, 200, 8, 0); - __visc__bindIn(var_58, 201, 9, 0); - - void* var_59 = __visc__createNodeND(0, var_59_node); - - __visc__edge(var_58, var_59, 1, 0, 0, 0); - __visc__edge(var_58, var_59, 1, 1, 1, 0); - - void* var_60 = __visc__createNodeND(0, var_60_node); - - __visc__edge(var_59, var_60, 1, 0, 0, 0); - __visc__edge(var_59, var_60, 1, 1, 1, 0); - __visc__bindIn(var_60, 202, 2, 0); - __visc__bindIn(var_60, 203, 3, 0); - - void* var_61 = __visc__createNodeND(0, var_61_node); - - __visc__edge(var_60, var_61, 1, 0, 0, 0); - __visc__edge(var_60, var_61, 1, 1, 1, 0); - __visc__bindIn(var_61, 204, 2, 0); - __visc__bindIn(var_61, 205, 3, 0); - __visc__bindIn(var_61, 206, 4, 0); - __visc__bindIn(var_61, 207, 5, 0); - __visc__bindIn(var_61, 208, 6, 0); - __visc__bindIn(var_61, 209, 7, 0); - __visc__bindIn(var_61, 210, 8, 0); - __visc__bindIn(var_61, 211, 9, 0); - - void* var_62 = __visc__createNodeND(0, var_62_node); - - __visc__edge(var_61, var_62, 1, 0, 0, 0); - __visc__edge(var_61, var_62, 1, 1, 1, 0); - - void* var_63 = __visc__createNodeND(0, var_63_node); - - __visc__edge(var_62, var_63, 1, 0, 0, 0); - __visc__edge(var_62, var_63, 1, 1, 1, 0); - __visc__bindIn(var_63, 212, 2, 0); - __visc__bindIn(var_63, 213, 3, 0); - - void* var_64 = __visc__createNodeND(0, var_64_node); - - __visc__edge(var_63, var_64, 1, 0, 0, 0); - __visc__edge(var_63, var_64, 1, 1, 1, 0); - __visc__bindIn(var_64, 214, 2, 0); - __visc__bindIn(var_64, 215, 3, 0); - __visc__bindIn(var_64, 216, 4, 0); - __visc__bindIn(var_64, 217, 5, 0); - __visc__bindIn(var_64, 218, 6, 0); - __visc__bindIn(var_64, 219, 7, 0); - __visc__bindIn(var_64, 220, 8, 0); - __visc__bindIn(var_64, 221, 9, 0); - - void* var_65 = __visc__createNodeND(0, var_65_node); - - __visc__edge(var_64, var_65, 1, 0, 0, 0); - __visc__edge(var_64, var_65, 1, 1, 1, 0); - - void* var_66 = __visc__createNodeND(0, var_66_node); - - __visc__edge(var_65, var_66, 1, 0, 0, 0); - __visc__edge(var_65, var_66, 1, 1, 1, 0); - __visc__bindIn(var_66, 222, 2, 0); - __visc__bindIn(var_66, 223, 3, 0); - - void* var_67 = __visc__createNodeND(0, var_67_node); - - __visc__edge(var_66, var_67, 1, 0, 0, 0); - __visc__edge(var_66, var_67, 1, 1, 1, 0); - __visc__bindIn(var_67, 224, 2, 0); - __visc__bindIn(var_67, 225, 3, 0); - __visc__bindIn(var_67, 226, 4, 0); - __visc__bindIn(var_67, 227, 5, 0); - __visc__bindIn(var_67, 228, 6, 0); - __visc__bindIn(var_67, 229, 7, 0); - __visc__bindIn(var_67, 230, 8, 0); - __visc__bindIn(var_67, 231, 9, 0); - - void* var_68 = __visc__createNodeND(0, var_68_node); - - __visc__edge(var_67, var_68, 1, 0, 0, 0); - __visc__edge(var_67, var_68, 1, 1, 1, 0); - - void* var_69 = __visc__createNodeND(0, var_69_node); - - __visc__edge(var_68, var_69, 1, 0, 0, 0); - __visc__edge(var_68, var_69, 1, 1, 1, 0); - __visc__bindIn(var_69, 232, 2, 0); - __visc__bindIn(var_69, 233, 3, 0); - - void* var_70 = __visc__createNodeND(0, var_70_node); - - __visc__edge(var_69, var_70, 1, 0, 0, 0); - __visc__edge(var_69, var_70, 1, 1, 1, 0); - __visc__bindIn(var_70, 234, 2, 0); - __visc__bindIn(var_70, 235, 3, 0); - __visc__bindIn(var_70, 236, 4, 0); - __visc__bindIn(var_70, 237, 5, 0); - __visc__bindIn(var_70, 238, 6, 0); - __visc__bindIn(var_70, 239, 7, 0); - __visc__bindIn(var_70, 240, 8, 0); - __visc__bindIn(var_70, 241, 9, 0); - - void* var_71 = __visc__createNodeND(0, var_71_node); - - __visc__edge(var_70, var_71, 1, 0, 0, 0); - __visc__edge(var_70, var_71, 1, 1, 1, 0); - - void* var_72 = __visc__createNodeND(0, var_72_node); - - __visc__edge(var_71, var_72, 1, 0, 0, 0); - __visc__edge(var_71, var_72, 1, 1, 1, 0); - __visc__bindIn(var_72, 242, 2, 0); - __visc__bindIn(var_72, 243, 3, 0); - - void* var_73 = __visc__createNodeND(0, var_73_node); - - __visc__edge(var_72, var_73, 1, 0, 0, 0); - __visc__edge(var_72, var_73, 1, 1, 1, 0); - __visc__bindIn(var_73, 244, 2, 0); - __visc__bindIn(var_73, 245, 3, 0); - __visc__bindIn(var_73, 246, 4, 0); - __visc__bindIn(var_73, 247, 5, 0); - __visc__bindIn(var_73, 248, 6, 0); - __visc__bindIn(var_73, 249, 7, 0); - __visc__bindIn(var_73, 250, 8, 0); - __visc__bindIn(var_73, 251, 9, 0); - - void* var_74 = __visc__createNodeND(0, var_74_node); - - __visc__edge(var_73, var_74, 1, 0, 0, 0); - __visc__edge(var_73, var_74, 1, 1, 1, 0); - - void* var_75 = __visc__createNodeND(0, var_75_node); - - __visc__edge(var_74, var_75, 1, 0, 0, 0); - __visc__edge(var_74, var_75, 1, 1, 1, 0); - __visc__bindIn(var_75, 252, 2, 0); - __visc__bindIn(var_75, 253, 3, 0); - - void* var_76 = __visc__createNodeND(0, var_76_node); - - __visc__edge(var_75, var_76, 1, 0, 0, 0); - __visc__edge(var_75, var_76, 1, 1, 1, 0); - __visc__bindIn(var_76, 254, 2, 0); - __visc__bindIn(var_76, 255, 3, 0); - __visc__bindIn(var_76, 256, 4, 0); - __visc__bindIn(var_76, 257, 5, 0); - __visc__bindIn(var_76, 258, 6, 0); - __visc__bindIn(var_76, 259, 7, 0); - __visc__bindIn(var_76, 260, 8, 0); - __visc__bindIn(var_76, 261, 9, 0); - - void* var_77 = __visc__createNodeND(0, var_77_node); - - __visc__edge(var_76, var_77, 1, 0, 0, 0); - __visc__edge(var_76, var_77, 1, 1, 1, 0); - - void* var_78 = __visc__createNodeND(0, var_78_node); - - __visc__edge(var_77, var_78, 1, 0, 0, 0); - __visc__edge(var_77, var_78, 1, 1, 1, 0); - __visc__bindIn(var_78, 262, 2, 0); - __visc__bindIn(var_78, 263, 3, 0); - - void* var_79 = __visc__createNodeND(0, var_79_node); - - __visc__edge(var_78, var_79, 1, 0, 0, 0); - __visc__edge(var_78, var_79, 1, 1, 1, 0); - __visc__bindIn(var_79, 264, 2, 0); - __visc__bindIn(var_79, 265, 3, 0); - __visc__bindIn(var_79, 266, 4, 0); - __visc__bindIn(var_79, 267, 5, 0); - __visc__bindIn(var_79, 268, 6, 0); - __visc__bindIn(var_79, 269, 7, 0); - __visc__bindIn(var_79, 270, 8, 0); - __visc__bindIn(var_79, 271, 9, 0); - - void* var_80 = __visc__createNodeND(0, var_80_node); - - __visc__edge(var_79, var_80, 1, 0, 0, 0); - __visc__edge(var_79, var_80, 1, 1, 1, 0); - - void* var_81 = __visc__createNodeND(0, var_81_node); - - __visc__edge(var_80, var_81, 1, 0, 0, 0); - __visc__edge(var_80, var_81, 1, 1, 1, 0); - - void* var_82 = __visc__createNodeND(0, var_82_node); - - __visc__edge(var_81, var_82, 1, 0, 0, 0); - __visc__edge(var_81, var_82, 1, 1, 1, 0); - __visc__bindIn(var_82, 272, 2, 0); - __visc__bindIn(var_82, 273, 3, 0); - - void* var_83 = __visc__createNodeND(0, var_83_node); - - __visc__edge(var_82, var_83, 1, 0, 0, 0); - __visc__edge(var_82, var_83, 1, 1, 1, 0); - __visc__bindIn(var_83, 274, 2, 0); - __visc__bindIn(var_83, 275, 3, 0); - - void* var_84 = __visc__createNodeND(0, var_84_node); - - __visc__edge(var_83, var_84, 1, 0, 0, 0); - __visc__edge(var_83, var_84, 1, 1, 1, 0); - - __visc__bindOut(var_84, 0, 0, 0); - __visc__bindOut(var_84, 1, 1, 0); - -} - -struct ret_t { - void* tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void* input; - size_t input_bytes; - void* conv2d_1_w; - size_t conv2d_1_w_bytes; - void* batch_normalization_1_gamma; - size_t batch_normalization_1_gamma_bytes; - void* batch_normalization_1_beta; - size_t batch_normalization_1_beta_bytes; - void* batch_normalization_1_mean; - size_t batch_normalization_1_mean_bytes; - void* batch_normalization_1_variance; - size_t batch_normalization_1_variance_bytes; - void* depthwise_conv2d_1_w; - size_t depthwise_conv2d_1_w_bytes; - void* batch_normalization_2_gamma; - size_t batch_normalization_2_gamma_bytes; - void* batch_normalization_2_beta; - size_t batch_normalization_2_beta_bytes; - void* batch_normalization_2_mean; - size_t batch_normalization_2_mean_bytes; - void* batch_normalization_2_variance; - size_t batch_normalization_2_variance_bytes; - void* conv2d_2_w; - size_t conv2d_2_w_bytes; - void* batch_normalization_3_gamma; - size_t batch_normalization_3_gamma_bytes; - void* batch_normalization_3_beta; - size_t batch_normalization_3_beta_bytes; - void* batch_normalization_3_mean; - size_t batch_normalization_3_mean_bytes; - void* batch_normalization_3_variance; - size_t batch_normalization_3_variance_bytes; - void* depthwise_conv2d_2_w; - size_t depthwise_conv2d_2_w_bytes; - void* batch_normalization_4_gamma; - size_t batch_normalization_4_gamma_bytes; - void* batch_normalization_4_beta; - size_t batch_normalization_4_beta_bytes; - void* batch_normalization_4_mean; - size_t batch_normalization_4_mean_bytes; - void* batch_normalization_4_variance; - size_t batch_normalization_4_variance_bytes; - void* conv2d_3_w; - size_t conv2d_3_w_bytes; - void* batch_normalization_5_gamma; - size_t batch_normalization_5_gamma_bytes; - void* batch_normalization_5_beta; - size_t batch_normalization_5_beta_bytes; - void* batch_normalization_5_mean; - size_t batch_normalization_5_mean_bytes; - void* batch_normalization_5_variance; - size_t batch_normalization_5_variance_bytes; - void* depthwise_conv2d_3_w; - size_t depthwise_conv2d_3_w_bytes; - void* batch_normalization_6_gamma; - size_t batch_normalization_6_gamma_bytes; - void* batch_normalization_6_beta; - size_t batch_normalization_6_beta_bytes; - void* batch_normalization_6_mean; - size_t batch_normalization_6_mean_bytes; - void* batch_normalization_6_variance; - size_t batch_normalization_6_variance_bytes; - void* conv2d_4_w; - size_t conv2d_4_w_bytes; - void* batch_normalization_7_gamma; - size_t batch_normalization_7_gamma_bytes; - void* batch_normalization_7_beta; - size_t batch_normalization_7_beta_bytes; - void* batch_normalization_7_mean; - size_t batch_normalization_7_mean_bytes; - void* batch_normalization_7_variance; - size_t batch_normalization_7_variance_bytes; - void* depthwise_conv2d_4_w; - size_t depthwise_conv2d_4_w_bytes; - void* batch_normalization_8_gamma; - size_t batch_normalization_8_gamma_bytes; - void* batch_normalization_8_beta; - size_t batch_normalization_8_beta_bytes; - void* batch_normalization_8_mean; - size_t batch_normalization_8_mean_bytes; - void* batch_normalization_8_variance; - size_t batch_normalization_8_variance_bytes; - void* conv2d_5_w; - size_t conv2d_5_w_bytes; - void* batch_normalization_9_gamma; - size_t batch_normalization_9_gamma_bytes; - void* batch_normalization_9_beta; - size_t batch_normalization_9_beta_bytes; - void* batch_normalization_9_mean; - size_t batch_normalization_9_mean_bytes; - void* batch_normalization_9_variance; - size_t batch_normalization_9_variance_bytes; - void* depthwise_conv2d_5_w; - size_t depthwise_conv2d_5_w_bytes; - void* batch_normalization_10_gamma; - size_t batch_normalization_10_gamma_bytes; - void* batch_normalization_10_beta; - size_t batch_normalization_10_beta_bytes; - void* batch_normalization_10_mean; - size_t batch_normalization_10_mean_bytes; - void* batch_normalization_10_variance; - size_t batch_normalization_10_variance_bytes; - void* conv2d_6_w; - size_t conv2d_6_w_bytes; - void* batch_normalization_11_gamma; - size_t batch_normalization_11_gamma_bytes; - void* batch_normalization_11_beta; - size_t batch_normalization_11_beta_bytes; - void* batch_normalization_11_mean; - size_t batch_normalization_11_mean_bytes; - void* batch_normalization_11_variance; - size_t batch_normalization_11_variance_bytes; - void* depthwise_conv2d_6_w; - size_t depthwise_conv2d_6_w_bytes; - void* batch_normalization_12_gamma; - size_t batch_normalization_12_gamma_bytes; - void* batch_normalization_12_beta; - size_t batch_normalization_12_beta_bytes; - void* batch_normalization_12_mean; - size_t batch_normalization_12_mean_bytes; - void* batch_normalization_12_variance; - size_t batch_normalization_12_variance_bytes; - void* conv2d_7_w; - size_t conv2d_7_w_bytes; - void* batch_normalization_13_gamma; - size_t batch_normalization_13_gamma_bytes; - void* batch_normalization_13_beta; - size_t batch_normalization_13_beta_bytes; - void* batch_normalization_13_mean; - size_t batch_normalization_13_mean_bytes; - void* batch_normalization_13_variance; - size_t batch_normalization_13_variance_bytes; - void* depthwise_conv2d_7_w; - size_t depthwise_conv2d_7_w_bytes; - void* batch_normalization_14_gamma; - size_t batch_normalization_14_gamma_bytes; - void* batch_normalization_14_beta; - size_t batch_normalization_14_beta_bytes; - void* batch_normalization_14_mean; - size_t batch_normalization_14_mean_bytes; - void* batch_normalization_14_variance; - size_t batch_normalization_14_variance_bytes; - void* conv2d_8_w; - size_t conv2d_8_w_bytes; - void* batch_normalization_15_gamma; - size_t batch_normalization_15_gamma_bytes; - void* batch_normalization_15_beta; - size_t batch_normalization_15_beta_bytes; - void* batch_normalization_15_mean; - size_t batch_normalization_15_mean_bytes; - void* batch_normalization_15_variance; - size_t batch_normalization_15_variance_bytes; - void* depthwise_conv2d_8_w; - size_t depthwise_conv2d_8_w_bytes; - void* batch_normalization_16_gamma; - size_t batch_normalization_16_gamma_bytes; - void* batch_normalization_16_beta; - size_t batch_normalization_16_beta_bytes; - void* batch_normalization_16_mean; - size_t batch_normalization_16_mean_bytes; - void* batch_normalization_16_variance; - size_t batch_normalization_16_variance_bytes; - void* conv2d_9_w; - size_t conv2d_9_w_bytes; - void* batch_normalization_17_gamma; - size_t batch_normalization_17_gamma_bytes; - void* batch_normalization_17_beta; - size_t batch_normalization_17_beta_bytes; - void* batch_normalization_17_mean; - size_t batch_normalization_17_mean_bytes; - void* batch_normalization_17_variance; - size_t batch_normalization_17_variance_bytes; - void* depthwise_conv2d_9_w; - size_t depthwise_conv2d_9_w_bytes; - void* batch_normalization_18_gamma; - size_t batch_normalization_18_gamma_bytes; - void* batch_normalization_18_beta; - size_t batch_normalization_18_beta_bytes; - void* batch_normalization_18_mean; - size_t batch_normalization_18_mean_bytes; - void* batch_normalization_18_variance; - size_t batch_normalization_18_variance_bytes; - void* conv2d_10_w; - size_t conv2d_10_w_bytes; - void* batch_normalization_19_gamma; - size_t batch_normalization_19_gamma_bytes; - void* batch_normalization_19_beta; - size_t batch_normalization_19_beta_bytes; - void* batch_normalization_19_mean; - size_t batch_normalization_19_mean_bytes; - void* batch_normalization_19_variance; - size_t batch_normalization_19_variance_bytes; - void* depthwise_conv2d_10_w; - size_t depthwise_conv2d_10_w_bytes; - void* batch_normalization_20_gamma; - size_t batch_normalization_20_gamma_bytes; - void* batch_normalization_20_beta; - size_t batch_normalization_20_beta_bytes; - void* batch_normalization_20_mean; - size_t batch_normalization_20_mean_bytes; - void* batch_normalization_20_variance; - size_t batch_normalization_20_variance_bytes; - void* conv2d_11_w; - size_t conv2d_11_w_bytes; - void* batch_normalization_21_gamma; - size_t batch_normalization_21_gamma_bytes; - void* batch_normalization_21_beta; - size_t batch_normalization_21_beta_bytes; - void* batch_normalization_21_mean; - size_t batch_normalization_21_mean_bytes; - void* batch_normalization_21_variance; - size_t batch_normalization_21_variance_bytes; - void* depthwise_conv2d_11_w; - size_t depthwise_conv2d_11_w_bytes; - void* batch_normalization_22_gamma; - size_t batch_normalization_22_gamma_bytes; - void* batch_normalization_22_beta; - size_t batch_normalization_22_beta_bytes; - void* batch_normalization_22_mean; - size_t batch_normalization_22_mean_bytes; - void* batch_normalization_22_variance; - size_t batch_normalization_22_variance_bytes; - void* conv2d_12_w; - size_t conv2d_12_w_bytes; - void* batch_normalization_23_gamma; - size_t batch_normalization_23_gamma_bytes; - void* batch_normalization_23_beta; - size_t batch_normalization_23_beta_bytes; - void* batch_normalization_23_mean; - size_t batch_normalization_23_mean_bytes; - void* batch_normalization_23_variance; - size_t batch_normalization_23_variance_bytes; - void* depthwise_conv2d_12_w; - size_t depthwise_conv2d_12_w_bytes; - void* batch_normalization_24_gamma; - size_t batch_normalization_24_gamma_bytes; - void* batch_normalization_24_beta; - size_t batch_normalization_24_beta_bytes; - void* batch_normalization_24_mean; - size_t batch_normalization_24_mean_bytes; - void* batch_normalization_24_variance; - size_t batch_normalization_24_variance_bytes; - void* conv2d_13_w; - size_t conv2d_13_w_bytes; - void* batch_normalization_25_gamma; - size_t batch_normalization_25_gamma_bytes; - void* batch_normalization_25_beta; - size_t batch_normalization_25_beta_bytes; - void* batch_normalization_25_mean; - size_t batch_normalization_25_mean_bytes; - void* batch_normalization_25_variance; - size_t batch_normalization_25_variance_bytes; - void* depthwise_conv2d_13_w; - size_t depthwise_conv2d_13_w_bytes; - void* batch_normalization_26_gamma; - size_t batch_normalization_26_gamma_bytes; - void* batch_normalization_26_beta; - size_t batch_normalization_26_beta_bytes; - void* batch_normalization_26_mean; - size_t batch_normalization_26_mean_bytes; - void* batch_normalization_26_variance; - size_t batch_normalization_26_variance_bytes; - void* conv2d_14_w; - size_t conv2d_14_w_bytes; - void* batch_normalization_27_gamma; - size_t batch_normalization_27_gamma_bytes; - void* batch_normalization_27_beta; - size_t batch_normalization_27_beta_bytes; - void* batch_normalization_27_mean; - size_t batch_normalization_27_mean_bytes; - void* batch_normalization_27_variance; - size_t batch_normalization_27_variance_bytes; - void* dense_1_w; - size_t dense_1_w_bytes; - void* dense_1_b; - size_t dense_1_b_bytes; - - struct ret_t r; -} -RootIn; - -int main(){ - -std::string dir_prefix = std::string("data/mobilenet_quant/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); -void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); -void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); -void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); -void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); -void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); -void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); -void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); -void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); -void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); -void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); -void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); -void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); -void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); -void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); -void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); -void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); -void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); -void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); -void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); -void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); -void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); -void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); -void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); -void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); -void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); -void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); -void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); -void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); -void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); -void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); -void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); -void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); -void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); -void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); -void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); -void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); -void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); -void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); -void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); -void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); -void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); -void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); -void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); -void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); -void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); -void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); -void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); -void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); -void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); -void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); -std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); -void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); -void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); -void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); -void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); -std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); -void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); -std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); -void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); -void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); -void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); -void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); -std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); -void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); -void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); -void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); -void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -uint8_t* labels = readLabels(labels_path.c_str(),10000); - -__visc__init(); -RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - -args->input = input; -args->input_bytes = 0; -args->conv2d_1_w = conv2d_1_w; -args->conv2d_1_w_bytes = 0; -args->batch_normalization_1_gamma = batch_normalization_1_gamma; -args->batch_normalization_1_gamma_bytes = 0; -args->batch_normalization_1_beta = batch_normalization_1_beta; -args->batch_normalization_1_beta_bytes = 0; -args->batch_normalization_1_mean = batch_normalization_1_mean; -args->batch_normalization_1_mean_bytes = 0; -args->batch_normalization_1_variance = batch_normalization_1_variance; -args->batch_normalization_1_variance_bytes = 0; -args->depthwise_conv2d_1_w = depthwise_conv2d_1_w; -args->depthwise_conv2d_1_w_bytes = 0; -args->batch_normalization_2_gamma = batch_normalization_2_gamma; -args->batch_normalization_2_gamma_bytes = 0; -args->batch_normalization_2_beta = batch_normalization_2_beta; -args->batch_normalization_2_beta_bytes = 0; -args->batch_normalization_2_mean = batch_normalization_2_mean; -args->batch_normalization_2_mean_bytes = 0; -args->batch_normalization_2_variance = batch_normalization_2_variance; -args->batch_normalization_2_variance_bytes = 0; -args->conv2d_2_w = conv2d_2_w; -args->conv2d_2_w_bytes = 0; -args->batch_normalization_3_gamma = batch_normalization_3_gamma; -args->batch_normalization_3_gamma_bytes = 0; -args->batch_normalization_3_beta = batch_normalization_3_beta; -args->batch_normalization_3_beta_bytes = 0; -args->batch_normalization_3_mean = batch_normalization_3_mean; -args->batch_normalization_3_mean_bytes = 0; -args->batch_normalization_3_variance = batch_normalization_3_variance; -args->batch_normalization_3_variance_bytes = 0; -args->depthwise_conv2d_2_w = depthwise_conv2d_2_w; -args->depthwise_conv2d_2_w_bytes = 0; -args->batch_normalization_4_gamma = batch_normalization_4_gamma; -args->batch_normalization_4_gamma_bytes = 0; -args->batch_normalization_4_beta = batch_normalization_4_beta; -args->batch_normalization_4_beta_bytes = 0; -args->batch_normalization_4_mean = batch_normalization_4_mean; -args->batch_normalization_4_mean_bytes = 0; -args->batch_normalization_4_variance = batch_normalization_4_variance; -args->batch_normalization_4_variance_bytes = 0; -args->conv2d_3_w = conv2d_3_w; -args->conv2d_3_w_bytes = 0; -args->batch_normalization_5_gamma = batch_normalization_5_gamma; -args->batch_normalization_5_gamma_bytes = 0; -args->batch_normalization_5_beta = batch_normalization_5_beta; -args->batch_normalization_5_beta_bytes = 0; -args->batch_normalization_5_mean = batch_normalization_5_mean; -args->batch_normalization_5_mean_bytes = 0; -args->batch_normalization_5_variance = batch_normalization_5_variance; -args->batch_normalization_5_variance_bytes = 0; -args->depthwise_conv2d_3_w = depthwise_conv2d_3_w; -args->depthwise_conv2d_3_w_bytes = 0; -args->batch_normalization_6_gamma = batch_normalization_6_gamma; -args->batch_normalization_6_gamma_bytes = 0; -args->batch_normalization_6_beta = batch_normalization_6_beta; -args->batch_normalization_6_beta_bytes = 0; -args->batch_normalization_6_mean = batch_normalization_6_mean; -args->batch_normalization_6_mean_bytes = 0; -args->batch_normalization_6_variance = batch_normalization_6_variance; -args->batch_normalization_6_variance_bytes = 0; -args->conv2d_4_w = conv2d_4_w; -args->conv2d_4_w_bytes = 0; -args->batch_normalization_7_gamma = batch_normalization_7_gamma; -args->batch_normalization_7_gamma_bytes = 0; -args->batch_normalization_7_beta = batch_normalization_7_beta; -args->batch_normalization_7_beta_bytes = 0; -args->batch_normalization_7_mean = batch_normalization_7_mean; -args->batch_normalization_7_mean_bytes = 0; -args->batch_normalization_7_variance = batch_normalization_7_variance; -args->batch_normalization_7_variance_bytes = 0; -args->depthwise_conv2d_4_w = depthwise_conv2d_4_w; -args->depthwise_conv2d_4_w_bytes = 0; -args->batch_normalization_8_gamma = batch_normalization_8_gamma; -args->batch_normalization_8_gamma_bytes = 0; -args->batch_normalization_8_beta = batch_normalization_8_beta; -args->batch_normalization_8_beta_bytes = 0; -args->batch_normalization_8_mean = batch_normalization_8_mean; -args->batch_normalization_8_mean_bytes = 0; -args->batch_normalization_8_variance = batch_normalization_8_variance; -args->batch_normalization_8_variance_bytes = 0; -args->conv2d_5_w = conv2d_5_w; -args->conv2d_5_w_bytes = 0; -args->batch_normalization_9_gamma = batch_normalization_9_gamma; -args->batch_normalization_9_gamma_bytes = 0; -args->batch_normalization_9_beta = batch_normalization_9_beta; -args->batch_normalization_9_beta_bytes = 0; -args->batch_normalization_9_mean = batch_normalization_9_mean; -args->batch_normalization_9_mean_bytes = 0; -args->batch_normalization_9_variance = batch_normalization_9_variance; -args->batch_normalization_9_variance_bytes = 0; -args->depthwise_conv2d_5_w = depthwise_conv2d_5_w; -args->depthwise_conv2d_5_w_bytes = 0; -args->batch_normalization_10_gamma = batch_normalization_10_gamma; -args->batch_normalization_10_gamma_bytes = 0; -args->batch_normalization_10_beta = batch_normalization_10_beta; -args->batch_normalization_10_beta_bytes = 0; -args->batch_normalization_10_mean = batch_normalization_10_mean; -args->batch_normalization_10_mean_bytes = 0; -args->batch_normalization_10_variance = batch_normalization_10_variance; -args->batch_normalization_10_variance_bytes = 0; -args->conv2d_6_w = conv2d_6_w; -args->conv2d_6_w_bytes = 0; -args->batch_normalization_11_gamma = batch_normalization_11_gamma; -args->batch_normalization_11_gamma_bytes = 0; -args->batch_normalization_11_beta = batch_normalization_11_beta; -args->batch_normalization_11_beta_bytes = 0; -args->batch_normalization_11_mean = batch_normalization_11_mean; -args->batch_normalization_11_mean_bytes = 0; -args->batch_normalization_11_variance = batch_normalization_11_variance; -args->batch_normalization_11_variance_bytes = 0; -args->depthwise_conv2d_6_w = depthwise_conv2d_6_w; -args->depthwise_conv2d_6_w_bytes = 0; -args->batch_normalization_12_gamma = batch_normalization_12_gamma; -args->batch_normalization_12_gamma_bytes = 0; -args->batch_normalization_12_beta = batch_normalization_12_beta; -args->batch_normalization_12_beta_bytes = 0; -args->batch_normalization_12_mean = batch_normalization_12_mean; -args->batch_normalization_12_mean_bytes = 0; -args->batch_normalization_12_variance = batch_normalization_12_variance; -args->batch_normalization_12_variance_bytes = 0; -args->conv2d_7_w = conv2d_7_w; -args->conv2d_7_w_bytes = 0; -args->batch_normalization_13_gamma = batch_normalization_13_gamma; -args->batch_normalization_13_gamma_bytes = 0; -args->batch_normalization_13_beta = batch_normalization_13_beta; -args->batch_normalization_13_beta_bytes = 0; -args->batch_normalization_13_mean = batch_normalization_13_mean; -args->batch_normalization_13_mean_bytes = 0; -args->batch_normalization_13_variance = batch_normalization_13_variance; -args->batch_normalization_13_variance_bytes = 0; -args->depthwise_conv2d_7_w = depthwise_conv2d_7_w; -args->depthwise_conv2d_7_w_bytes = 0; -args->batch_normalization_14_gamma = batch_normalization_14_gamma; -args->batch_normalization_14_gamma_bytes = 0; -args->batch_normalization_14_beta = batch_normalization_14_beta; -args->batch_normalization_14_beta_bytes = 0; -args->batch_normalization_14_mean = batch_normalization_14_mean; -args->batch_normalization_14_mean_bytes = 0; -args->batch_normalization_14_variance = batch_normalization_14_variance; -args->batch_normalization_14_variance_bytes = 0; -args->conv2d_8_w = conv2d_8_w; -args->conv2d_8_w_bytes = 0; -args->batch_normalization_15_gamma = batch_normalization_15_gamma; -args->batch_normalization_15_gamma_bytes = 0; -args->batch_normalization_15_beta = batch_normalization_15_beta; -args->batch_normalization_15_beta_bytes = 0; -args->batch_normalization_15_mean = batch_normalization_15_mean; -args->batch_normalization_15_mean_bytes = 0; -args->batch_normalization_15_variance = batch_normalization_15_variance; -args->batch_normalization_15_variance_bytes = 0; -args->depthwise_conv2d_8_w = depthwise_conv2d_8_w; -args->depthwise_conv2d_8_w_bytes = 0; -args->batch_normalization_16_gamma = batch_normalization_16_gamma; -args->batch_normalization_16_gamma_bytes = 0; -args->batch_normalization_16_beta = batch_normalization_16_beta; -args->batch_normalization_16_beta_bytes = 0; -args->batch_normalization_16_mean = batch_normalization_16_mean; -args->batch_normalization_16_mean_bytes = 0; -args->batch_normalization_16_variance = batch_normalization_16_variance; -args->batch_normalization_16_variance_bytes = 0; -args->conv2d_9_w = conv2d_9_w; -args->conv2d_9_w_bytes = 0; -args->batch_normalization_17_gamma = batch_normalization_17_gamma; -args->batch_normalization_17_gamma_bytes = 0; -args->batch_normalization_17_beta = batch_normalization_17_beta; -args->batch_normalization_17_beta_bytes = 0; -args->batch_normalization_17_mean = batch_normalization_17_mean; -args->batch_normalization_17_mean_bytes = 0; -args->batch_normalization_17_variance = batch_normalization_17_variance; -args->batch_normalization_17_variance_bytes = 0; -args->depthwise_conv2d_9_w = depthwise_conv2d_9_w; -args->depthwise_conv2d_9_w_bytes = 0; -args->batch_normalization_18_gamma = batch_normalization_18_gamma; -args->batch_normalization_18_gamma_bytes = 0; -args->batch_normalization_18_beta = batch_normalization_18_beta; -args->batch_normalization_18_beta_bytes = 0; -args->batch_normalization_18_mean = batch_normalization_18_mean; -args->batch_normalization_18_mean_bytes = 0; -args->batch_normalization_18_variance = batch_normalization_18_variance; -args->batch_normalization_18_variance_bytes = 0; -args->conv2d_10_w = conv2d_10_w; -args->conv2d_10_w_bytes = 0; -args->batch_normalization_19_gamma = batch_normalization_19_gamma; -args->batch_normalization_19_gamma_bytes = 0; -args->batch_normalization_19_beta = batch_normalization_19_beta; -args->batch_normalization_19_beta_bytes = 0; -args->batch_normalization_19_mean = batch_normalization_19_mean; -args->batch_normalization_19_mean_bytes = 0; -args->batch_normalization_19_variance = batch_normalization_19_variance; -args->batch_normalization_19_variance_bytes = 0; -args->depthwise_conv2d_10_w = depthwise_conv2d_10_w; -args->depthwise_conv2d_10_w_bytes = 0; -args->batch_normalization_20_gamma = batch_normalization_20_gamma; -args->batch_normalization_20_gamma_bytes = 0; -args->batch_normalization_20_beta = batch_normalization_20_beta; -args->batch_normalization_20_beta_bytes = 0; -args->batch_normalization_20_mean = batch_normalization_20_mean; -args->batch_normalization_20_mean_bytes = 0; -args->batch_normalization_20_variance = batch_normalization_20_variance; -args->batch_normalization_20_variance_bytes = 0; -args->conv2d_11_w = conv2d_11_w; -args->conv2d_11_w_bytes = 0; -args->batch_normalization_21_gamma = batch_normalization_21_gamma; -args->batch_normalization_21_gamma_bytes = 0; -args->batch_normalization_21_beta = batch_normalization_21_beta; -args->batch_normalization_21_beta_bytes = 0; -args->batch_normalization_21_mean = batch_normalization_21_mean; -args->batch_normalization_21_mean_bytes = 0; -args->batch_normalization_21_variance = batch_normalization_21_variance; -args->batch_normalization_21_variance_bytes = 0; -args->depthwise_conv2d_11_w = depthwise_conv2d_11_w; -args->depthwise_conv2d_11_w_bytes = 0; -args->batch_normalization_22_gamma = batch_normalization_22_gamma; -args->batch_normalization_22_gamma_bytes = 0; -args->batch_normalization_22_beta = batch_normalization_22_beta; -args->batch_normalization_22_beta_bytes = 0; -args->batch_normalization_22_mean = batch_normalization_22_mean; -args->batch_normalization_22_mean_bytes = 0; -args->batch_normalization_22_variance = batch_normalization_22_variance; -args->batch_normalization_22_variance_bytes = 0; -args->conv2d_12_w = conv2d_12_w; -args->conv2d_12_w_bytes = 0; -args->batch_normalization_23_gamma = batch_normalization_23_gamma; -args->batch_normalization_23_gamma_bytes = 0; -args->batch_normalization_23_beta = batch_normalization_23_beta; -args->batch_normalization_23_beta_bytes = 0; -args->batch_normalization_23_mean = batch_normalization_23_mean; -args->batch_normalization_23_mean_bytes = 0; -args->batch_normalization_23_variance = batch_normalization_23_variance; -args->batch_normalization_23_variance_bytes = 0; -args->depthwise_conv2d_12_w = depthwise_conv2d_12_w; -args->depthwise_conv2d_12_w_bytes = 0; -args->batch_normalization_24_gamma = batch_normalization_24_gamma; -args->batch_normalization_24_gamma_bytes = 0; -args->batch_normalization_24_beta = batch_normalization_24_beta; -args->batch_normalization_24_beta_bytes = 0; -args->batch_normalization_24_mean = batch_normalization_24_mean; -args->batch_normalization_24_mean_bytes = 0; -args->batch_normalization_24_variance = batch_normalization_24_variance; -args->batch_normalization_24_variance_bytes = 0; -args->conv2d_13_w = conv2d_13_w; -args->conv2d_13_w_bytes = 0; -args->batch_normalization_25_gamma = batch_normalization_25_gamma; -args->batch_normalization_25_gamma_bytes = 0; -args->batch_normalization_25_beta = batch_normalization_25_beta; -args->batch_normalization_25_beta_bytes = 0; -args->batch_normalization_25_mean = batch_normalization_25_mean; -args->batch_normalization_25_mean_bytes = 0; -args->batch_normalization_25_variance = batch_normalization_25_variance; -args->batch_normalization_25_variance_bytes = 0; -args->depthwise_conv2d_13_w = depthwise_conv2d_13_w; -args->depthwise_conv2d_13_w_bytes = 0; -args->batch_normalization_26_gamma = batch_normalization_26_gamma; -args->batch_normalization_26_gamma_bytes = 0; -args->batch_normalization_26_beta = batch_normalization_26_beta; -args->batch_normalization_26_beta_bytes = 0; -args->batch_normalization_26_mean = batch_normalization_26_mean; -args->batch_normalization_26_mean_bytes = 0; -args->batch_normalization_26_variance = batch_normalization_26_variance; -args->batch_normalization_26_variance_bytes = 0; -args->conv2d_14_w = conv2d_14_w; -args->conv2d_14_w_bytes = 0; -args->batch_normalization_27_gamma = batch_normalization_27_gamma; -args->batch_normalization_27_gamma_bytes = 0; -args->batch_normalization_27_beta = batch_normalization_27_beta; -args->batch_normalization_27_beta_bytes = 0; -args->batch_normalization_27_mean = batch_normalization_27_mean; -args->batch_normalization_27_mean_bytes = 0; -args->batch_normalization_27_variance = batch_normalization_27_variance; -args->batch_normalization_27_variance_bytes = 0; -args->dense_1_w = dense_1_w; -args->dense_1_w_bytes = 0; -args->dense_1_b = dense_1_b; -args->dense_1_b_bytes = 0; - -void* dfg = __visc__launch(0, root, (void*) args); - -__visc__wait(dfg); - -void *result = static_cast<RootIn*>(args)->input; -hpvm_request_tensor(result, 0); - -__visc__cleanup(); - computeAccuracy2(labels, 10000, result); -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_beta.bin deleted file mode 100644 index bb1eb07a8e262d2f4d941578fd4c19d6a90c7562..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_gamma.bin deleted file mode 100644 index 931c8925b89f363a41d3cf81483bde60abafba61..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_mean.bin deleted file mode 100644 index 633bdc9fd4a9ef052ca8b6ab488a156002e3d4b5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_variance.bin deleted file mode 100644 index f92c73f59eb5eb35ca94e3ce006e5f3c4f60ecef..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_10_variance.bin +++ /dev/null @@ -1,2 +0,0 @@ -njÏ>å‰>(è(>]÷>AuH>ôÖ.>«ïe>c*W>ÕäY>5T3>hsI>H>ó>õæ>2þS>íj[>¶†‰>]€>at>;t=>{¾D> £¢>JcÊ>.ý`>·&>«–>[À¢>³#Þ=W¦ä>µµ>‰Aª>f‚>Vw>T@…>\’%>"i+>¹d>qB?ÅÚ8>”>~>A>ÂŽ;>u8G>*¨3>žj>P Ê= w>Knd>V>®ã8>ÖÃu>½Ê>¤Â?>®Ãà>P2>o·=d>ëFI>û¶Ž>‚º&>'A‡>¾úù>Ó>l±8>@MD>>ص>Ýfü=P>y;g>-ú³>–üC>¾s>Æ8>Ä=y>…>hŸ> -T>/Ð{>ü€Ù>hæª>gw>î}>ŸC¢>à9>D6>Ð1->å2B>~.>·Ì">05>'þD>Ï…€>²Ò3>¸”y>×Âþ=c>–O°>pëÏ>OVá=Òó‹> û=CG'>çO_>š&>‚íw>ã’>%>»$>ÇG4>Ó¸>LP>ñ6P>n>«£$>ï’Œ>…»€>ôÅŠ>þœ>±v§>W.q>îÏ>:ëh>EäP>o¢F>Îg >DK5>êÒ>ÿ!>_FQ>q‚&>ÖwŸ>]e<>ߤ>Ô²j>³D>`PC>ýA$>&1>Îc·>TÏ7>%>™Ëy>Uð¨>QÏ>·*9>ÑÙ=UÊŽ>ä >ªÒJ>ÁÙ=ä">Þ¶>ç>Å\a>èk1># >‹p)>/ ¤> H]>ÖV>·ª>`¯>œ'T>¼3‡>²]©>6¢>¥‹·>Xã¥>óŸ=°’Ð=c¥Â>öÀ8>sQ1>¿{°>}§>>“>f…>#äá=Çi>…Â>œ®m>¡l«>£v> =d>ØOX>T>Ìõ;>z®>•±V>ÆÝ^>K>#žç=œ9S>~F\>üg|>‰É>O»®>©"p>[ÆN>[YD>›£ý=!U>&3>Ó´>\û>v>¶,ƒ> Žb>•=e>¹Ò>S8…>Ä03>æÇZ>³Y>†2>YÄ`>C¯r>áÍ“>îç>¼Xð=ï‚F>‚Bk>Þ?">SÅ>ÙLž> øP>ôgÚ>HÔ¤>y/>î$>˜MÄ> –•>.‚ô=©u°=å6>5@>y>t÷à=RŒ%>ø¬> >2Òj>dO%>Õ˪>&|O>EVn>Ÿ¨>íË7>l:>HzŸ>³G’>!y¹>µÈF> a(> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_beta.bin deleted file mode 100644 index 5918477d3638e851c3fdfc47dc550cea3afa7d50..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_gamma.bin deleted file mode 100644 index 6b3d705199383135bed811a6fdaa237d754487bd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_mean.bin deleted file mode 100644 index 965edb6440d48ce4b9abc68cd3b9eb1d3f9cf3da..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_variance.bin deleted file mode 100644 index a7a4b16bd7e581a4fdf1819ec0484559febd1fca..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_11_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_beta.bin deleted file mode 100644 index 8ade4cf080d7d3228e752d284ed500ba6300d261..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_gamma.bin deleted file mode 100644 index 6dfb7c3833821b29f9230df806c4abc0c16a7b59..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_gamma.bin +++ /dev/null @@ -1,5 +0,0 @@ - «S?#ªI?ùàn? :g?o%P?ßR†?OŸP?û<?á6?†W?ÅM?ߣH?B»d? i_?Ã{W?ÅßC? -X?.c¥?ä=^?k÷>?Ÿ%{?\þM?uI?øM?çIž?9Qa?éØK?¢AR?K^^?¦9?À#œ?‰@˜?ÏõD?T?‡sh?ÒÀX?HèM?Ì_?J%X?¿U?ñEn?”O?nóŸ?ýH?-±¡?j%r?Ñë]?’;?VÞW?m f?™Z??á™Y?@T?ÃxU?/M?÷ŠO?j6Ž?‚™[?ÿÜn?r´D?{ÒJ?xÌV?°ïY?R€?®lH?ÎÐK?m°T?Å m?¥Û@?P›L?ìÕ˜?'î?ò˜Y?Ä1?&±?$L?1¡¬?— G?ÚIw?˜ñ?z4? |K?ñN?,™@?a¦H? dZ?ÈóY?s´N?Ÿ)ˆ?°yd?³ù£?†\?<èX?ŽåO?¬N?²ÚO?™?4aQ?Xy? -)Ÿ?›^?7uF?(X? hš?³?A3u?¸-“?«7P?=×a?œ‹C?ßøˆ? qq?$ÚP?àß?Šì¨?ö^?%œp?kO?”Q?Šd?_G?ˆ??ïÞ@?½œk?<öV?¬<R?°>?.jO?„Œ?2¬Q?¥ûª?µÊY?ÓÙD?L—f?EU?c²6?O©®?Z(H?‰Š?KX?p¦T?‚Jm?…;?ÇŸŠ?¶€?ým?Øp?¨@?~Ó^?;öC?/€[?ÃÑ©?zÅ‹?1éH?ìT>?p b?q9^?| K? -ÞS?ÑE?[ô[?;ï’?/0?¬`?°µZ?FuX?o@c?eÑn?ÛvL?>š?êo”?ïèg? -·q?2èF?AI™?j™=?7ÖZ?üó_?$ÆH?¹È_?øaW?jªƒ?ÅwT?^Ïm?•P?tZm?ns??°r^?]»U?@bk?†Ž?fŸ?ð'†?ñ´‘?ߤ?#Ñž?V'¢?[hM?fY]? ¤?’?q¼a?Ýþ[?g}¡?IJH?Š3”?ç\F?þ]?ÂS?w2D?—!V?¹IV?èõL?¡Œ_?øâ™?ÖxH?‘ùX?éH?+K8?†áV?ûR?–ß“?¶EI?˜'Z?Ñì^?¶~@?ö:–?ª¼_? ²ž?ÒÐ]?C¼·?!V?ÕK?Âc‘?9i”?XY?$¸D??Å©?V#E?>Ž`?!Z?Éñ˜?c¯U?Öæš?v9?jfX?ÌvŸ?3aV?ÍðR?qt‘?ü ?ôýE?b{B?(ß7?{I\?& K? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_mean.bin deleted file mode 100644 index 8899c2ad8395a98c752b1777095018cc90ca693b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_variance.bin deleted file mode 100644 index 9206092b5ee7fa6178bb9109a9aabd5dbfaa7ccf..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_12_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_beta.bin deleted file mode 100644 index 0f5fe8656435b28ec4b928af599b0a63915a651a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_gamma.bin deleted file mode 100644 index c79d7d0b02b65ea9953bfd1fa164773f96e5ade0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_mean.bin deleted file mode 100644 index 2a6d471779cb2634718545d33827ca1d8d023c07..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_variance.bin deleted file mode 100644 index 5a2e2c8ca3645c6115b341b71141029d25064f18..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_13_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_beta.bin deleted file mode 100644 index 79948d2a5e40f633e6675c9c8c98f186a3ae2626..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_gamma.bin deleted file mode 100644 index 2f9a59ae913b2fcf4ef44018e295a055ea357d45..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_mean.bin deleted file mode 100644 index 278d39b1a67c00a4015d2687ab936ddd4cbc6e34..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_variance.bin deleted file mode 100644 index 4749c1a52d14caccf7df518ad56f2c03901dcf1a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_14_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_beta.bin deleted file mode 100644 index 27f1a01dee6e2c9631ef312015fca880f8aa7b99..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_gamma.bin deleted file mode 100644 index 0fe3148783c75679668beae35231fa2eb0308a8a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_mean.bin deleted file mode 100644 index 9701d55c3d49a2d4ee43a45dad07886d62591653..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_variance.bin deleted file mode 100644 index f679da9df83af326cc3d886528c298157ffbb561..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_15_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_beta.bin deleted file mode 100644 index c2802a0da57a45a0839b9896a3dd0a9a70b8e669..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_gamma.bin deleted file mode 100644 index f94cebe4a7af3a4c840c2f8b9bbb9a1ee7cb5b29..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_mean.bin deleted file mode 100644 index a6d415f6dfd476fe1fd620794230c6d289158f50..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_variance.bin deleted file mode 100644 index efa5fcfd7916e86848227806134efd7b4ec1e55e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_16_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_beta.bin deleted file mode 100644 index 41201773cfd82292ab63ade568191ed261648538..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_gamma.bin deleted file mode 100644 index 87613f6bc687bd539da0dd3fbda58e19a3e4071c..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_mean.bin deleted file mode 100644 index dee72d911fc96d785150d99101faac2905c61bb8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_variance.bin deleted file mode 100644 index 86732c56ca1d6fa38ed0ccd379a26a7756816f7b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_17_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_beta.bin deleted file mode 100644 index c520fdc378129c16c3c7ab8772faea68e00fd4f7..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_gamma.bin deleted file mode 100644 index 1aec3276306988ccd80ab907faba7538170d6e0e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_mean.bin deleted file mode 100644 index cf9f6a04871515eae7a1aee7c9d103ca13bc8aae..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_variance.bin deleted file mode 100644 index 7b46f134cd68995d45a2baab62188fd775e4ae82..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_18_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_beta.bin deleted file mode 100644 index a4a7d99bc7b4c8f1a0d5dbdc4385036d01586d33..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_gamma.bin deleted file mode 100644 index 60ea687e491464d474868e42dfc21ce1cd67961d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_mean.bin deleted file mode 100644 index 2d9c9ef86608e1af225cd46ddd07d3a2bb9d5853..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_variance.bin deleted file mode 100644 index f4e2ef2b5ae595944b6d2a4191594a2029508b1b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_19_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_beta.bin deleted file mode 100644 index d6a711c22f8e5e9b9df5fe17fec24e12d35c20cc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_beta.bin +++ /dev/null @@ -1,2 +0,0 @@ -ˆVT½ý¿P¾…~¬=Œ¶‚>v6R=ÑR§¾ P£¾Öw‚¾þv>˜é ?qlk½!’?·cÜ>£ -¯¾)šs¾(ì>!<(?Œë>o÷½¹”=6X¾êjA¼eê½&\Ü>Å—I¾ÔÞP¾].^=(ÿ¤>ã g?‹r?Tõ> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_gamma.bin deleted file mode 100644 index 9565d3b2a5ed07f2017c79534d689a729160ca46..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_gamma.bin +++ /dev/null @@ -1 +0,0 @@ -9‚Œ?Ýåf?íDƒ?œ[€?ú†Ž?.8€?Z!„?L;|?èƒ?‹ƒ?ŽÃt?.ƒ??î2q?6É?!?o?©¢]?Žmx?ýXƒ?§‚??9?Mº„?éÆr?f?~?>Ò~?JŒ?sh€?‰j,?üt}?Vt? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_mean.bin deleted file mode 100644 index f552c5162cd4d3d2ed8c0edf098c2a9adbb403fd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_variance.bin deleted file mode 100644 index 715fe55fd43af30b967ade11301595dd051a7770..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_1_variance.bin +++ /dev/null @@ -1 +0,0 @@ -?P¬A%l4AaAF@B|ÝAKö@È…DBB„>‘AŸó¯A‡¥@AA÷B"ôÍ?jþ@ÕŒ‘BQ-µA‹…BZBé?ö)¦D¼]øB8]MA•,AÐå;@àù€@Ê·þB¥žA¨¯ŽB²[®@¼ó^A5¬?•ÃÂ@¤œ@ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_beta.bin deleted file mode 100644 index 5291d00818ecc56eb039c71ed86d1a8e7e0f03a5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_gamma.bin deleted file mode 100644 index 0ac1e2c1fa63ce2deb08f1b7a5aacd925749385b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_mean.bin deleted file mode 100644 index f183a0ee683d40cc26247a32963e6321f85e7688..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_variance.bin deleted file mode 100644 index 1d9fac8cdd2e32c1e821deaef3ad2a6bcd4cbdb9..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_20_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_beta.bin deleted file mode 100644 index 393f76218be9548b415c5b1a43a3c63a302b7300..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_gamma.bin deleted file mode 100644 index 8b84922da7063fb41b68d983475c4c9bf91a2ac1..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_mean.bin deleted file mode 100644 index 78f070dc6515294f189e0b71692e4f61981608fc..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_variance.bin deleted file mode 100644 index e2e11c338fb2ea2a00d3aae3798ca3a2fdb82a1b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_21_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_beta.bin deleted file mode 100644 index bf38673377e42584d82b848299c7bfb531655de5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_gamma.bin deleted file mode 100644 index fd397b675a9a5da3fc1174a2f56f84ef3d67a8e8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_mean.bin deleted file mode 100644 index 13549710237f51a5a9c84abf6272275396fff888..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_variance.bin deleted file mode 100644 index 8102a808657f0b45d3a2a959bb3793c24f0c14ca..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_22_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_beta.bin deleted file mode 100644 index c396a8e2939c25d30b2021e6ca343913021309f3..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_gamma.bin deleted file mode 100644 index 0ee822b7e19677f3b7f7fcfce5456c2b1082efd7..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_mean.bin deleted file mode 100644 index fbf6f4eac60ed424271646218cb74ddaa5d74104..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_variance.bin deleted file mode 100644 index d630a7ac1ecc23cfaeb1c88311dd6e5c6c4bbdbc..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_23_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_beta.bin deleted file mode 100644 index 3c70dadf33fe75b4e62ad704c6e4eebfe726792a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_gamma.bin deleted file mode 100644 index 09cd79dc17aea4d5c5b6c604248a81d929170e45..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_mean.bin deleted file mode 100644 index cbf013bcb470738d762c2cbda76745bf80ec765b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_variance.bin deleted file mode 100644 index 0039d0bad928dee087c70a587d0e5a843790e077..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_24_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_beta.bin deleted file mode 100644 index 0c9f7ae71b66a85ed843a45703717064be84a64c..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_gamma.bin deleted file mode 100644 index 8ae7623c12452151e9a4b100cd344f9b46121bab..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_mean.bin deleted file mode 100644 index 062398cda6d3315629ee845e1bdd7d4623bc7493..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_variance.bin deleted file mode 100644 index 0b5029b6aba8673c6fd7a9844c0feb4b8d7da490..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_25_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_beta.bin deleted file mode 100644 index 1edd9d65782ee53219b97efd095a0d31af296d06..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_gamma.bin deleted file mode 100644 index f9885c71b64218be5ce4187a9306e1869c41b5fc..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_mean.bin deleted file mode 100644 index 9d34da9b2aae4e306e7061e380168ac6bc0f7a00..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_variance.bin deleted file mode 100644 index 2bd6648fa7d61af054f9d36916cc1975f3f351ae..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_26_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_beta.bin deleted file mode 100644 index e6b513e4055d1394fe9eb9437b00864d570780aa..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_gamma.bin deleted file mode 100644 index 0349ab56289301dbc5d95375e0a553afb8cc8cf6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_mean.bin deleted file mode 100644 index 8ae8e0fc3d161ef33ebd15cbdc620863332e8216..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_variance.bin deleted file mode 100644 index 602be2e5a92239d688e30a082d79f8bec599c27f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_27_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_beta.bin deleted file mode 100644 index c9af5d00060958d9ce8073e95c74483ba63bcbec..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_beta.bin +++ /dev/null @@ -1,2 +0,0 @@ -D=>½Ì-¿’?9½Ýà´9œÓj?ҒȾŽ…>%¾Ý ½–•Ó?Š?£¾Û'ã?’Z<>—Ö¿;N‹>âyh¾[ÿM?gÁU¼{³-¾¤¤=Ìr¾”öç¾65V¾Î\ʾ·C¾*\ ¾ -:`?™N<U”?~ZÈ?|•É> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_gamma.bin deleted file mode 100644 index 59b78a30bf741b86e7bcd8346981f76749c2a981..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_mean.bin deleted file mode 100644 index faa537236ff696e81e93fdcffef78e86c66ead9f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_mean.bin +++ /dev/null @@ -1 +0,0 @@ -©+4>뮂?zÌ?ÂÖ’?Ã?Ĥ§?˜Þ½,c@ܶ׾¨(µ¿)Á'?ßì6ÀI+¿©Ïª>X¼@BîÞ¿N‹²>Èo¿ú©ë¾Ý±,¿óØ^>ì"¢¾‚}4?r@¹B<ÀWÇJ¿}ª¾Ûi-Àôm“¾|__>Ý¿”áÙ¿ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_variance.bin deleted file mode 100644 index 9a9ec730a4aabf7b35e502daca5dfe0dbf113418..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_2_variance.bin +++ /dev/null @@ -1,2 +0,0 @@ -)s?á}†@à ’?°êû?ßn@sQb@ êR>‘¨üAÇ'¬> -°4@곂?2p•AYd?˜Ó?Ó9A#uAKI0?¬é>“˜P?‹¥’?>U>?ŒØ<>=VŽ?æ`4AŸ‡Aj•®?Aƒ[>»[Añ ö>¼\º?}I2?ÔF@ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_beta.bin deleted file mode 100644 index dfbcff725a71852e107a04917d0a65a3544604e5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_beta.bin +++ /dev/null @@ -1 +0,0 @@ -z½}=»V?Ÿ–Õ>2|Š>ƒÌ¾³ç“>÷훽îX<F—X?¼È½÷Ǿ¼G>ùg¾'x>àñ(>dá?]kß½};¨½ü’Ï=aƒP>þ᤾ùm?m–¾ S¼º„ûÇ=k;¨??|Úè=D@¾Võ*¾‚Ò >±öê:\ŸV>S*Ľâ¾äTI?áD>*¥>!)Ž>šÎ>âp>ñÞ»=‡,“=P¥[=©½tyW¼¤p¾é·J>J>ió<Ï->–Vµ½×õS>ƒ¼²>@ã#?ÿM*¾-cŒ½ª°h?^òý=ÔXš>kŸ;½Á5×=;ŠX»UE"? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_gamma.bin deleted file mode 100644 index ded64a0e5a70a9155c377e8a8244b85f623dee46..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_gamma.bin +++ /dev/null @@ -1 +0,0 @@ -œÃ„? ·?¹?u? âz?¬~?ò”|?q?lŽƒ?‰ñ?z ‚?O€?ä €?Hå}?Ì€z?R?óå^?ƒ¬?ª:…?(è€?zp?ñvr?óœ]?Hƒ?Agƒ?¯[‚?é+€?&1/?ìè?ÒÞw?¥}?å]€?ç?JD~?Hƒ?o(~?6ñ'?/~?EÖ~?Õz?ÆBl?ä?.€?¢ø?9g?a°ƒ?›‚?n€??O?€?È‚?…`s?’‚? ß|?äv?ŸtT?L¦‚? ¤ˆ?ù;ï>èÜ€?-±v?êj?Æ#‡?ˆ×~?KSQ? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_mean.bin deleted file mode 100644 index 058394e6ac8c95cec8fb6050daf47289e8c81b48..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_mean.bin +++ /dev/null @@ -1,2 +0,0 @@ -ø5¿›‚Ï?Žî–¾W„ÀÑŽ">}‚IÀ°ì½ÿÕ@¿©@‹ûÏ?&gü>ŒíÅ?Ã~Á>?34¿mëN?y’1?aŒ> ß¾ÞÀb„,À—qâ?j%„¿ -@ÓÀÜ|ÀÔ{™?ž´·?D @¬]î¾T„>tí¾¼#ˆ?Ôž–?qŽ¿öÂ:¿0Ž>¡Ji?ܽ¤¾Òá?õwʼöX¿™<¿åÀ7aD?Ê?°~²?ÿŒN¾8„å?ß ¿‡U?°Í§¾ß¬?§EP?€½JO?¡x@ÏcF@¦‹@Ù_@“Ô³¿bèà?–Ô¾ò÷¾0óµ¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_variance.bin deleted file mode 100644 index d5dba0a9275910fdded47a2604453ae46f611c16..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_3_variance.bin +++ /dev/null @@ -1 +0,0 @@ -:Â@ª÷í?P&å?{¯Œ@õŽw?ÉI@ÔûŒ?Ç—?Ó¨û?†AÆ>sÎ?Bˆ??þÙ?Ú’{?·è@ÀÖÆ?ç¹%@s½–?F.F@ƒªí?â¹®?A<@Ÿ?QN»?N?˧“?ÁÌ@ …ä>M&?³²?¬“G?³®¯?姹?×mt?®–â?œv?Ÿ/Ö?Z0?AÒG@ÄSµ?û<?Î É?*ëx?œLÒ?é5ƒ?ÃǪ?TÎ@«8(@0]?ß3@QÆ£?kä?»,?%!©?6ï?3or@¤û @ÌO?•m’?{Ÿ?“¨ô?fêZ?lg¿? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_beta.bin deleted file mode 100644 index 70ddacf8f0bd27523892f5af52ded3302c4715d4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_gamma.bin deleted file mode 100644 index 3f64ef0b25bb6e00a6012f360e65812d22ca672f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_gamma.bin +++ /dev/null @@ -1 +0,0 @@ -Ïeq?€¨¦?Þ?ÇK?SM?^ãp?°‰?Ð9„?Ñ—»?i”^? |?kf?¿áv?bÿb?í÷ƒ?”tÄ?°Ì1?)·?²ù‡?½W?2Et?½e{?ÊÝ‘?Ñu?ìzŠ?RC†?ßê?÷EV?/V?<m?9v?ª³(?:‰”??SÉ?ß½±?P36?z×Y?Þçs?å[?ªrb?^û:?QwC?XŽz?Ö‰c?Hó‚?6*g?ó“4?Ñ¡`?92’?,Œ?Ê€?"X?¡ße?+¸™?õk…?*#Ï?Ÿ+Ó?óz? Њ?oM?4Í“?Ó/?ç’Š? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_mean.bin deleted file mode 100644 index 28c78d2db90aadc66f0d1f7d647e32044fd12744..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_variance.bin deleted file mode 100644 index 8f361cbf915cd5fb93f32847280d50dad8e9b791..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_4_variance.bin +++ /dev/null @@ -1 +0,0 @@ -õäe>À(?nE?¨Ú>[½i>ÑO'?à(t?„kG?(‰d?Áxâ?òLÍ>Œi?ˆ`·>´ž?wÒo?•ï?LÙé?@¯„?ܽC?š5ˆ?Mâ,?Ã0Ð>4£Ô>©ËÀ>—}>¨`>„¶>ú]’?Ø@·)@šZ@ß5R@øÿF?Án¯>Á5ç>º}œ?à ?Ðÿ>ãš@R‚œ?¾Å—?fô>éž?²'^@!vÝ>2,?)©®>®}A§ß†?aåm@9\?¯Ä?S©…?ë´?ÆB‹?ƒµ>ªâb?´bW?ÈC?FH?ʇ›?߀?Uã? ?ˆ? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_beta.bin deleted file mode 100644 index 37161ae89f38c6489ae9ed0d99ad2df5a5f2f093..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_gamma.bin deleted file mode 100644 index efefd0af2fbdc436d3321906166debd0323c1571..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_mean.bin deleted file mode 100644 index 7eb215a96c6fb385ec761cf16be0339f3656b717..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_variance.bin deleted file mode 100644 index d220b9e27ad8cd5a7b4bcba39105c8ee969bc4f3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_5_variance.bin +++ /dev/null @@ -1,2 +0,0 @@ -Oús?×_?,Ì?´È^?’ŽÊ?ôó7?öÌ@®‘Æ?ü9ž?v©Ž?Àeø?±\›?)9?S\U?¥B¼?ú¸’?ºc¤?Qúì?øZ3?'?o?š|?þ&¾?–_’?µ¼€?1Ë?+*O?%‹?g» -?ØR?)Í>?ëD“?غÿ>UŽ?#®6?ªº‰?)·;?wÙ’?0®]?çjg?-B?Aâ?RÌ?öŠI?[O9?fBB?Ý[?ôl?âF?ª´º?éË’?µ”@µ?È‚?\h@?µ¾'?D?Jx?°Ç(@ë,§?É%?áæD?Éa?j¥„?: :?‡Ò«?!·q?çò¤?FG+?³wY?;j?ÇSe?h>Â?>¬?õJ‡?¥GÎ?É”1?ôŸÇ@l)’?Ç`?ë.V?J+?'çü?H«Ø?)à©?™?p?mr?ôÈA?vª?ÿ²š?æJŸ?›5?lK?g.†?•bŠ?O{ˆ?3Tk?µY?\hœ?òa?®Pt?JM%?^?êC?êtª?ÏŸ?”µ?ÞuT?B–?8w?S‰?p4›?XR?âf@?ïÄO?NÙk?ðɉ?Ô?´*?l;r?üüØ?<¡{?°×>>¢?Y‹?e8€?‘Њ?!? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_beta.bin deleted file mode 100644 index 39a7a8779dc5ba6a394748a88391fbbf8b35ec23..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_gamma.bin deleted file mode 100644 index 8bfc97196078b732c1ab61e8a3bbb656d29d3728..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_gamma.bin +++ /dev/null @@ -1,2 +0,0 @@ -–¼‹?B„?çd?Ó5?Mn?p¶I?¤éº?«›g?µy?é˜?·P\?œF?ˆ5?*ç:?íR‡?+Q\?&ˆ?µqN?! -G?ºU?üŠ? nt?òÂ(?w£?g›{?•óH?,G‘?Ó’?ÎÛŽ?Ɔ?Ç>’?<‚ƒ?2¿›?˜ºx?˯<?€Z7?Ó!?¾ç}?$‰?¼æp?ÎT?þrR?ç´›?WŽJ?µ’?]>c?ùQ?-±‰?îÓ`?¦”Y?”Úp?TÉt?|ˆ‚?óA?²‚?‹}‡?¬'j?9Î?¾N‡?`ŽW?FË?€z?X{?Áœ5?©«o?yS.?ê{?FÆ8? µ›??Ï~?ÛæT?_)I?+Yw?õãw?D~?Bû?Ñ"Ò?†?Âsƒ?>ÞW?>6?¼¶¤?zûD?Úé†?³ˆ?ŸM?à`?G{'?¼¾–?=Â]?Ú€~?»R˜?˜®ˆ?ßMz?Ÿdu?͈U?œö1?£Ü–?ž,„?\ÓŠ?<Vf?{8)?“&o?ݬˆ?t~—?ïï•?ôÆ”?D…?X/)?`Ú?@œ ?cj?-ƒ?“?¿Às?hím?o?åÅ—?Æ+‘?ùW‚?²F?ßø‚?>F?@–m?¥m?߯?ì‚?ì3? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_mean.bin deleted file mode 100644 index f427d142f3bf2147d302426700b2f0ee817ec308..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_variance.bin deleted file mode 100644 index 4c571acca77f147260874e9ae0ff1722076746ca..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_6_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_beta.bin deleted file mode 100644 index 4e72081f35c879ebc0d0bc57e3ced79a81200854..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_gamma.bin deleted file mode 100644 index e8ac9fe5f793a80b78c9a2099d37a96d093097ba..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_gamma.bin +++ /dev/null @@ -1,2 +0,0 @@ -ORa?Œ^€?o'€?Tpy?b‘F?ø,~?¬£?€ž€?B‹u?ÿœ€?g“z?²€?d}?j~?_gg?o?šÏf?.:k?#*\?Ò€?û;o?Ïö?L€?Yy?ÁR€?Zíp?lVq?¸€?GXZ?±Ä?Ñ€?—M~?¯¼q?8|€?›Dk?j?6ØT?\,€?¦€?®†?_?Öþ|?wòl?jSn?«€?qè?”Zn?!ªv?€ƒr?æ\v? „~?z]s?”€?¯*x?|U_?N€?å*?üU€?FÚu?ïÇn?«T?á/y?”™€?ÄóS?*ÿq?L4€?Su?Ãq?q`w?´f?Z2~?È)?Ø<€?x€?0¥~?0;e?Být?tß~?¸S{? þ?+T€?{á?<†~?[? -C~?»@?81}?€‹?$€?ƒ»?Z\}?/ÉV?T·~?4|?Lr?¶‹d?ˆ i? Úg?þ„€?âL?¼wu?›€?¾Ù{?« i?à6€?«Úk?–•v?Èyn?×d?X¹j?À?Ód?¸º~?‘s?Íá?#Çl?øg}?"I€?åNx?wÕ?ú˜x?äe|?‰dr?Ö/€?zw?À|€?ïlu?Øn~? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_mean.bin deleted file mode 100644 index 42ec4b5d965a8dc26c8d6218195e1c87739fb9fa..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_variance.bin deleted file mode 100644 index 17911f473710c3e37246c1de1a4121be21585ee2..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_7_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_beta.bin deleted file mode 100644 index c3b0b374f59d9c906906d51621a99704e26ed422..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_gamma.bin deleted file mode 100644 index b271fb02201a3f354162e281cf1bac5998ed28a2..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_mean.bin deleted file mode 100644 index 5888235eb54a3c9ad548a51708eb39c13d7e8ddd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_mean.bin +++ /dev/null @@ -1,3 +0,0 @@ -N¡>Ða¿éNR¿Þ0¾> Ô>é«8=J_¿í™ -¿À0?*ö!¿«£½>Ï¿VhE¿„«>Ð¥>˜¸Ë>S{„>U¼> Çt>?+¿}¼>ØÀ>mÈ¿ŒOT¿]+¿9½>k„Ú>Gã`=Õ?E>o’ƒ¾É“7¿6ã<¾~Ä>N÷¿ýÄ>ÝŠ[¿È½ >2\¿7Ò.¿77e¿y߶>ŠÁÏ>°|¸>ßRÊ>… ¿™e¡½±#>˜"µ>t´>NÚ>¯¾*°?SÏ¿6g×>dX^>M¿=í¿§…×>×ʼ>ÍÕ>l¨>ÙqC¿™gS>‹¤>b+¿g¿Þ>sº½>3È -?&,©>Èâ¸>p‹¿u†¾Éß+¿ÎÂÍ>jÑU>èÞr>P>½ûä?áØ2¿ñ)¿Ñ ¿ÜP¿D½‚>Ú_¿çã&>ø|²> 9¿$6 ?Z3=ÍYL¿#’>˜3*¿©ö;ÐOŽ>œô›>¸ŠŠ>?*¥>¹ÑS¿Îz˼Bð>±ï<¿aC<~®>/ªI¿”vº>œ¸>¨á¬>GÀ>-Y¯>'¿™>Æw.¿Áî¥>½s3¿Ö8®>e“Ñ>¼¿ï¯‚>gCV½À|ç>óhá>¼æÀ>Sç:¿:Á>Ó¿˜ý¨>…±¿ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_variance.bin deleted file mode 100644 index be017b25adccfc236b22789abd11b0ff50fb5a40..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_8_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_beta.bin deleted file mode 100644 index 13e7e2a820d8c80f79e05b91540c0d5493387306..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_gamma.bin deleted file mode 100644 index 4d65230c8dc292bceb2414527469eca65674af13..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_mean.bin deleted file mode 100644 index 67b8b25e4fff4232001931073a803f3dfe363187..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_variance.bin deleted file mode 100644 index 59a0b1e0d59434dfb9d94f4cefdcfab4cdec0b93..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/batch_normalization_9_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_10_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_10_w.bin deleted file mode 100644 index 2000dbf19acd71e28da72db217f7f34d80be4d55..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_10_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_11_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_11_w.bin deleted file mode 100644 index e38c7f59fa6346b7a4c1c2e676cec648277986aa..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_11_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_12_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_12_w.bin deleted file mode 100644 index fd7b6121bdd5b28f0c65caec9e90676d9ccc2171..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_12_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_13_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_13_w.bin deleted file mode 100644 index 2a6a844fa8e1ee98017c3d1e3a9024f39c6f1568..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_13_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_14_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_14_w.bin deleted file mode 100644 index ff22cedb2ef6ef7aaffbf434d5dae78cf813de27..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_14_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_1_w.bin deleted file mode 100644 index bafe4f5ad48926ac6a00086e2e9ce2cda85bd9ec..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_2_w.bin deleted file mode 100644 index eff0fc063670e2a30c86b70b2611787f454db6fb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_3_w.bin deleted file mode 100644 index e09cda44638fd9f0032b47d6f5fc7ece69cd24b8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_4_w.bin deleted file mode 100644 index ce941bc4965f21e57f6b6cab24639d8bab593b6e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_5_w.bin deleted file mode 100644 index 12a7e35468d1d003b9f65b4a515f82c4a2f42ca6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_6_w.bin deleted file mode 100644 index 15c80714155c176c53788c7a4926ae90d6a50a54..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_7_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_7_w.bin deleted file mode 100644 index aabaa5eb3ce76dba62573d51d7b63d037df1ce82..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_7_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_8_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_8_w.bin deleted file mode 100644 index ad954d098872fcf34792606a50d7e46c6a0008c6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_8_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_9_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_9_w.bin deleted file mode 100644 index 50ea54350fc605740424c8b6e5a48cbe7846181b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/conv2d_9_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/dense_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/dense_1_b.bin deleted file mode 100644 index 1e697e20d8008cba5750a47aa9a53d8b29b1b0e2..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/dense_1_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/dense_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/dense_1_w.bin deleted file mode 100644 index 9105f0e8d7739016cce69125dee5e8102d67c8d8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/dense_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_10_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_10_w.bin deleted file mode 100644 index f7cbc07e8ef10d1c910e8cb8e0880a263f944d4e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_10_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_11_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_11_w.bin deleted file mode 100644 index c9fb2daae05c1272ee93cf8dfd817e08591834e1..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_11_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_12_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_12_w.bin deleted file mode 100644 index 58c263417c0669304fff4416cd7c45dc001d4f81..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_12_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_13_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_13_w.bin deleted file mode 100644 index 36d45717f5a1435df7c2cecca1353ca326ea98f9..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_13_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_1_w.bin deleted file mode 100644 index 0224a1a1465811bf5768565cc637a9757e8db9c2..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_2_w.bin deleted file mode 100644 index 33c3af23f2fee0a9bd871d3e95c26d17b7108c29..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_3_w.bin deleted file mode 100644 index 1bcfbd7df4591bde2936e7ccfa9b1f10cf9f0d1e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_4_w.bin deleted file mode 100644 index 49a61f541371dd83a76c5efa90cd9ec3eaa13de0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_5_w.bin deleted file mode 100644 index d488d6077e6a7e13a9bf8fbd9eb67fa735d6befe..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_6_w.bin deleted file mode 100644 index 7ab35e18d4824343230e241e3c6ecfcc20b57b83..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_7_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_7_w.bin deleted file mode 100644 index 569a5573a4f9a5a3f7fb87361b30f361abcff2cb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_7_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_8_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_8_w.bin deleted file mode 100644 index 10dc6502f6d0c128cdeae1fd07359be2bc500981..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_8_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_9_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_9_w.bin deleted file mode 100644 index 9112cb3cc2eb816e5e3592b00cd331c23b185b1d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/depthwise_conv2d_9_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/input.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/input.bin deleted file mode 100644 index 86390e39e0f8515d52ca6d5ab99b98af7d72b93c..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/input.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/labels.bin deleted file mode 100644 index 72e2c6650e2d717f25484f9f67068be084e7f175..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/labels32.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/labels32.bin deleted file mode 100644 index 870f85ff4802d369b0db3bf334ba566338f683a1..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/labels32.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/layer_composition.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/layer_composition.txt deleted file mode 100644 index 10692997a90e4490a91ad3d0e6e04285754144fd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/layer_composition.txt +++ /dev/null @@ -1,83 +0,0 @@ -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation - - -activation -conv - -activation -pool -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/layers.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/layers.txt deleted file mode 100644 index 0bd2b554374c10d748a652f52e5427c716be0084..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/layers.txt +++ /dev/null @@ -1,83 +0,0 @@ -Conv1,10000,3,32,32,32,3,3,3 -#tensorBatchNorm1 -#tensorRelu1 -#tensorDepthwiseConv1 -#tensorBatchNorm2 -#tensorRelu2 -Conv2,10000,32,32,32,64,32,1,1 -#tensorBatchNorm3 -#tensorRelu3 -#tensorDepthwiseConv2 -#tensorBatchNorm4 -#tensorRelu4 -Conv3,10000,64,16,16,128,64,1,1 -#tensorBatchNorm5 -#tensorRelu5 -#tensorDepthwiseConv3 -#tensorBatchNorm6 -#tensorRelu6 -Conv4,10000,128,16,16,128,128,1,1 -#tensorBatchNorm7 -#tensorRelu7 -#tensorDepthwiseConv4 -#tensorBatchNorm8 -#tensorRelu8 -Conv5,10000,128,8,8,256,128,1,1 -#tensorBatchNorm9 -#tensorRelu9 -#tensorDepthwiseConv5 -#tensorBatchNorm10 -#tensorRelu10 -Conv6,10000,256,8,8,256,256,1,1 -#tensorBatchNorm11 -#tensorRelu11 -#tensorDepthwiseConv6 -#tensorBatchNorm12 -#tensorRelu12 -Conv7,10000,256,4,4,512,256,1,1 -#tensorBatchNorm13 -#tensorRelu13 -#tensorDepthwiseConv7 -#tensorBatchNorm14 -#tensorRelu14 -Conv8,10000,512,4,4,512,512,1,1 -#tensorBatchNorm15 -#tensorRelu15 -#tensorDepthwiseConv8 -#tensorBatchNorm16 -#tensorRelu16 -Conv9,10000,512,4,4,512,512,1,1 -#tensorBatchNorm17 -#tensorRelu17 -#tensorDepthwiseConv9 -#tensorBatchNorm18 -#tensorRelu18 -Conv10,10000,512,4,4,512,512,1,1 -#tensorBatchNorm19 -#tensorRelu19 -#tensorDepthwiseConv10 -#tensorBatchNorm20 -#tensorRelu20 -Conv11,10000,512,4,4,512,512,1,1 -#tensorBatchNorm21 -#tensorRelu21 -#tensorDepthwiseConv11 -#tensorBatchNorm22 -#tensorRelu22 -Conv12,10000,512,4,4,512,512,1,1 -#tensorBatchNorm23 -#tensorRelu23 -#tensorDepthwiseConv12 -#tensorBatchNorm24 -#tensorRelu24 -Conv13,10000,512,2,2,1024,512,1,1 -#tensorBatchNorm25 -#tensorRelu25 -#tensorDepthwiseConv13 -#tensorBatchNorm26 -#tensorRelu26 -Conv14,10000,1024,2,2,1024,1024,1,1 -#tensorBatchNorm27 -#tensorRelu27 -#tensorPooling1 -FC1,10000,1024,1024,10 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/mobilenet_layers.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/mobilenet_layers.txt deleted file mode 100644 index c2a4a29509ad89724905c869ff900f8ecaa5bf8c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/mobilenet_layers.txt +++ /dev/null @@ -1,83 +0,0 @@ -Conv1,10000,3,32,32,32,3,3,3 -NML1 -NML2 -NML3 -NML4 -NML5 -Conv3,10000,32,32,32,64,32,1,1 -NML6 -NML7 -NML8 -NML9 -NML10 -Conv5,10000,64,16,16,128,64,1,1 -NML11 -NML12 -NML13 -NML14 -NML15 -Conv7,10000,128,16,16,128,128,1,1 -NML16 -NML17 -NML18 -NML19 -NML20 -Conv9,10000,128,8,8,256,128,1,1 -NML21 -NML22 -NML23 -NML24 -NML25 -Conv11,10000,256,8,8,256,256,1,1 -NML26 -NML27 -NML28 -NML29 -NML30 -Conv13,10000,256,4,4,512,256,1,1 -NML31 -NML32 -NML33 -NML34 -NML35 -Conv15,10000,512,4,4,512,512,1,1 -NML36 -NML37 -NML38 -NML39 -NML40 -Conv17,10000,512,4,4,512,512,1,1 -NML41 -NML42 -NML43 -NML44 -NML45 -Conv19,10000,512,4,4,512,512,1,1 -NML46 -NML47 -NML48 -NML49 -NML50 -Conv21,10000,512,4,4,512,512,1,1 -NML51 -NML52 -NML53 -NML54 -NML55 -Conv23,10000,512,4,4,512,512,1,1 -NML56 -NML57 -NML58 -NML59 -NML60 -Conv25,10000,512,2,2,1024,512,1,1 -NML61 -NML62 -NML63 -NML64 -NML65 -Conv27,10000,1024,2,2,1024,1024,1,1 -NML66 -NML67 -NML68 -FC1,10000,1024,1024,10 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/mobilenet_ops.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/mobilenet_ops.txt deleted file mode 100644 index 8e18f2ec58cddb9ab0251229b1e908b23b71d6bc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/mobilenet_ops.txt +++ /dev/null @@ -1,165 +0,0 @@ -#Conv1,1 -Conv1 -#NML1,1 -BatchNorm1 -#NML2,1 -Relu1 -#NML3,1 -Conv2 -#NML4,1 -BatchNorm2 -#NML5,1 -Relu2 -#Conv3,1 -Conv3 -#NML6,1 -BatchNorm3 -#NML7,1 -Relu3 -#NML8,1 -Conv4 -#NML9,1 -BatchNorm4 -#NML10,1 -Relu4 -#Conv5,1 -Conv5 -#NML11,1 -BatchNorm5 -#NML12,1 -Relu5 -#NML13,1 -Conv6 -#NML14,1 -BatchNorm6 -#NML15,1 -Relu6 -#Conv7,1 -Conv7 -#NML16,1 -BatchNorm7 -#NML17,1 -Relu7 -#NML18,1 -Conv8 -#NML19,1 -BatchNorm8 -#NML20,1 -Relu8 -#Conv9,1 -Conv9 -#NML21,1 -BatchNorm9 -#NML22,1 -Relu9 -#NML23,1 -Conv10 -#NML24,1 -BatchNorm10 -#NML25,1 -Relu10 -#Conv11,1 -Conv11 -#NML26,1 -BatchNorm11 -#NML27,1 -Relu11 -#NML28,1 -Conv12 -#NML29,1 -BatchNorm12 -#NML30,1 -Relu12 -#Conv13,1 -Conv13 -#NML31,1 -BatchNorm13 -#NML32,1 -Relu13 -#NML33,1 -Conv14 -#NML34,1 -BatchNorm14 -#NML35,1 -Relu14 -#Conv15,1 -Conv15 -#NML36,1 -BatchNorm15 -#NML37,1 -Relu15 -#NML38,1 -Conv16 -#NML39,1 -BatchNorm16 -#NML40,1 -Relu16 -#Conv17,1 -Conv17 -#NML41,1 -BatchNorm17 -#NML42,1 -Relu17 -#NML43,1 -Conv18 -#NML44,1 -BatchNorm18 -#NML45,1 -Relu18 -#Conv19,1 -Conv19 -#NML46,1 -BatchNorm19 -#NML47,1 -Relu19 -#NML48,1 -Conv20 -#NML49,1 -BatchNorm20 -#NML50,1 -Relu20 -#Conv21,1 -conv21 -#NML51,1 -BatchNorm21 -#NML52,1 -Relu21 -#NML53,1 -Conv22 -#NML54,1 -BatchNorm22 -#NML55,1 -Relu22 -#Conv23,1 -Conv23 -#NML56,1 -BatchNorm23 -#NML57,1 -Relu23 -#NML58,1 -Conv24 -#NML59,1 -BatchNorm24 -#NML60,1 -Relu24 -#Conv25,1 -Conv25 -#NML61,1 -BatchNorm25 -#NML62,1 -Relu25 -#NML63,1 -Conv26 -#NML64,1 -BatchNorm26 -#NML65,1 -Relu26 -#Conv27,1 -Conv27 -#NML66,1 -BatchNorm27 -#NML67,1 -Relu27 -#NML68,1 -Pool1 -FC1,10000,1024,1024,10 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/promise_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/promise_src.cc deleted file mode 100644 index 146bc640cc4b1e8da65e3e7bb6cb5c7f2a007399..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/promise_src.cc +++ /dev/null @@ -1,420 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - -int total_runs = 100; -for (int i = 0 ; i < total_runs; i++){ - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - - - -std::string dir_prefix = std::string("data/mobilenet_quant/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); -void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); -void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); -void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); -void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); -void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); -void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); -void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); -void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); -void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); -void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); -void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); -void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); -void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); -void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); -void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); -void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); -void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); -void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); -void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); -void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); -void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); -void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); -void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); -void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); -void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); -void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); -void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); -void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); -void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); -void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); -void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); -void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); -void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); -void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); -void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); -void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); -void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); -void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); -void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); -void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); -void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); -void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); -void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); -void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); -void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); -void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); -void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); -void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); -void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); -void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); -std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); -void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); -void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); -void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); -void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); -std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); -void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); -std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); -void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); -void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); -void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); -void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); -std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); -void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); -void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); -void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); -void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); -void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); -void* var_2 = tensorRelu(var_1); -void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); -void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); -void* var_5 = tensorRelu(var_4); -void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); -void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); -void* var_8 = tensorRelu(var_7); -void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); -void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); -void* var_11 = tensorRelu(var_10); -void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); -void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); -void* var_14 = tensorRelu(var_13); -void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); -void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); -void* var_17 = tensorRelu(var_16); -void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); -void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); -void* var_20 = tensorRelu(var_19); -void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); -void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); -void* var_23 = tensorRelu(var_22); -void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); -void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); -void* var_26 = tensorRelu(var_25); -void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); -void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); -void* var_29 = tensorRelu(var_28); -void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); -void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); -void* var_32 = tensorRelu(var_31); -void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); -void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); -void* var_35 = tensorRelu(var_34); -void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); -void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); -void* var_40 = tensorBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); -void* var_41 = tensorRelu(var_40); -void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); -void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); -void* var_44 = tensorRelu(var_43); -void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); -void* var_46 = tensorBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); -void* var_47 = tensorRelu(var_46); -void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); -void* var_49 = tensorBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); -void* var_50 = tensorRelu(var_49); -void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); -void* var_52 = tensorBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); -void* var_53 = tensorRelu(var_52); -void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); -void* var_55 = tensorBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); -void* var_56 = tensorRelu(var_55); -void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); -void* var_58 = tensorBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); -void* var_59 = tensorRelu(var_58); -void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); -void* var_61 = tensorBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); -void* var_62 = tensorRelu(var_61); -void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); -void* var_64 = tensorBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); -void* var_65 = tensorRelu(var_64); -void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); -void* var_67 = tensorBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); -void* var_68 = tensorRelu(var_67); -void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); -void* var_70 = tensorBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); -void* var_71 = tensorRelu(var_70); -void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); -void* var_73 = tensorBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); -void* var_74 = tensorRelu(var_73); -void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); -void* var_76 = tensorBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); -void* var_77 = tensorRelu(var_76); -void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); -void* var_79 = tensorBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); -void* var_80 = tensorRelu(var_79); -void* var_81 = tensorPooling(var_80,1,2,2,0,0,2,2); -void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); -void* var_83 = tensorSoftmax(var_82); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_83); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -} - -dumpExecutionAccuracies(); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/quant_ranges.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/quant_ranges.txt deleted file mode 100644 index 9ea66b8485dc19a8f2f9abfc5981e023f22ce521..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/quant_ranges.txt +++ /dev/null @@ -1,15 +0,0 @@ --1.9892114 2.126797 -2.19630692005 1.34758170414 0.0 0.0 -60.892750473 51.9925691605 -0.0 5.71354155397 -0.931772116065 1.07742589378 0.0 0.0 -6.51858950329 6.81084251881 -0.0 4.93213940287 -0.531654466152 0.57537904036 0.0 0.0 -4.48263123512 3.96730119753 -0.0 4.10326339769 -0.362340988219 0.407691390038 0.0 0.0 -4.04261828327 3.8867793293 -0.0 5.38322130251 -0.313120054901 0.293576799393 0.0 0.0 -5.92146921539 4.33867932415 -0.0 4.31673815441 -0.232992478013 0.258029025793 0.0 0.0 -4.20778994751 3.93243697071 -0.0 5.8304081068 -0.202337772191 0.189983081758 0.0 0.0 -6.29828691578 4.84813511753 -0.0 4.44641780996 -0.174427356511 0.176958308667 0.0 0.0 -4.34791088581 3.61443646955 -0.0 4.5180956049 -0.145467961878 0.15256431669 0.0 0.0 -3.02877027559 2.94873657799 -0.0 6.34857563496 -0.130258745223 0.135582433432 0.0 0.0 -4.22931008053 3.53150463724 -0.0 5.22100311041 -0.119001727596 0.125363747835 0.0 0.0 -4.03820378017 4.00400940704 -0.0 5.73249834776 -0.108397216856 0.116256686077 0.0 0.0 -3.31110151148 4.46293323326 -0.0 7.24049821186 -0.0862374496162 0.0885944995135 0.0 0.0 -4.17543139458 6.2043294754 -0.0 7.81395883465 -0.0681302513927 0.0700202777982 0.0 0.0 -10.9205664234 2.64429125786 -0.0 2.86920666504 -0.223010196954 0.14426593782 -0.1654396 0.23336112 -12.2459499588 23.8053251343 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/src.cc deleted file mode 100644 index 25aec9bde3bc1aac157e2acc368dcddf866e455d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet/src.cc +++ /dev/null @@ -1,413 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - -std::string dir_prefix = std::string("data/mobilenet_quant/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); -void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); -void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); -void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); -void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); -void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); -void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); -void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); -void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); -void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); -void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); -void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); -void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); -void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); -void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); -void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); -void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); -void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); -void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); -void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); -void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); -void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); -void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); -void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); -void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); -void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); -void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); -void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); -void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); -void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); -void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); -void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); -void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); -void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); -void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); -void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); -void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); -void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); -void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); -void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); -void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); -void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); -void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); -void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); -void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); -void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); -void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); -void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); -void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); -void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); -void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); -std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); -void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); -void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); -void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); -void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); -std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); -void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); -std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); -void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); -void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); -void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); -void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); -std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); -void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); -void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); -void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); -void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); -void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); -void* var_2 = tensorRelu(var_1); -void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); -void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); -void* var_6 = tensorRelu(var_5); -void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); -void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); -void* var_9 = tensorRelu(var_8); -void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); -void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); -void* var_13 = tensorRelu(var_12); -void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); -void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); -void* var_16 = tensorRelu(var_15); -void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); -void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); -void* var_20 = tensorRelu(var_19); -void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); -void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); -void* var_23 = tensorRelu(var_22); -void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); -void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); -void* var_28 = tensorRelu(var_27); -void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); -void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); -void* var_31 = tensorRelu(var_30); -void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); -void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); -void* var_35 = tensorRelu(var_34); -void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); -void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); -void* var_38 = tensorRelu(var_37); -void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); -void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); -void* var_43 = tensorRelu(var_42); -void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); -void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); -void* var_46 = tensorRelu(var_45); -void* var_48 = tensorConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); -void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); -void* var_50 = tensorRelu(var_49); -void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); -void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); -void* var_53 = tensorRelu(var_52); -void* var_55 = tensorConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); -void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); -void* var_57 = tensorRelu(var_56); -void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); -void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); -void* var_60 = tensorRelu(var_59); -void* var_63 = tensorConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); -void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); -void* var_65 = tensorRelu(var_64); -void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); -void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); -void* var_68 = tensorRelu(var_67); -void* var_70 = tensorConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); -void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); -void* var_72 = tensorRelu(var_71); -void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); -void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); -void* var_75 = tensorRelu(var_74); -void* var_77 = tensorConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); -void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); -void* var_79 = tensorRelu(var_78); -void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); -void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); -void* var_82 = tensorRelu(var_81); -void* var_85 = tensorConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); -void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); -void* var_87 = tensorRelu(var_86); -void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); -void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); -void* var_90 = tensorRelu(var_89); -void* var_92 = tensorConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); -void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); -void* var_94 = tensorRelu(var_93); -void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); -void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); -void* var_97 = tensorRelu(var_96); -void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); -void* var_101 = tensorGemmGPU(var_99, dense_1_w); -void* var_102 = tensorAdd(var_101, dense_1_b); -void* var_103 = tensorSoftmax(var_102); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_103); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/approxhpvm_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/approxhpvm_src.cc deleted file mode 100644 index dc0c873c63333299981591cb5654cb38be9d4092..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/approxhpvm_src.cc +++ /dev/null @@ -1,1224 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_2_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 32); - __visc__return(2, r, (size_t) 0); -} - -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_5_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_6_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_8_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 64); - __visc__return(2, r, (size_t) 0); -} - -void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_11_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_13_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_14_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 128); - __visc__return(2, r, (size_t) 0); -} - -void var_16_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_17_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_20_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 128); - __visc__return(2, r, (size_t) 0); -} - -void var_22_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_23_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_26_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 256); - __visc__return(2, r, (size_t) 0); -} - -void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_29_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_32_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_33_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 256); - __visc__return(2, r, (size_t) 0); -} - -void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_35_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_36_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(5, t1, t2, t3, t4, t5, 0); - - void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); - __visc__return(2, r, (size_t) 0); -} - -void var_38_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_39_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_avg(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_41_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_42_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t input_bytes, - void* conv2d_1_w, size_t conv2d_1_w_bytes, - void* batch_normalization_1_gamma, size_t batch_normalization_1_gamma_bytes, - void* batch_normalization_1_beta, size_t batch_normalization_1_beta_bytes, - void* batch_normalization_1_mean, size_t batch_normalization_1_mean_bytes, - void* batch_normalization_1_variance, size_t batch_normalization_1_variance_bytes, - void* depthwise_conv2d_1_w, size_t depthwise_conv2d_1_w_bytes, - void* batch_normalization_2_gamma, size_t batch_normalization_2_gamma_bytes, - void* batch_normalization_2_beta, size_t batch_normalization_2_beta_bytes, - void* batch_normalization_2_mean, size_t batch_normalization_2_mean_bytes, - void* batch_normalization_2_variance, size_t batch_normalization_2_variance_bytes, - void* conv2d_2_w, size_t conv2d_2_w_bytes, - void* batch_normalization_3_gamma, size_t batch_normalization_3_gamma_bytes, - void* batch_normalization_3_beta, size_t batch_normalization_3_beta_bytes, - void* batch_normalization_3_mean, size_t batch_normalization_3_mean_bytes, - void* batch_normalization_3_variance, size_t batch_normalization_3_variance_bytes, - void* depthwise_conv2d_2_w, size_t depthwise_conv2d_2_w_bytes, - void* batch_normalization_4_gamma, size_t batch_normalization_4_gamma_bytes, - void* batch_normalization_4_beta, size_t batch_normalization_4_beta_bytes, - void* batch_normalization_4_mean, size_t batch_normalization_4_mean_bytes, - void* batch_normalization_4_variance, size_t batch_normalization_4_variance_bytes, - void* conv2d_3_w, size_t conv2d_3_w_bytes, - void* batch_normalization_5_gamma, size_t batch_normalization_5_gamma_bytes, - void* batch_normalization_5_beta, size_t batch_normalization_5_beta_bytes, - void* batch_normalization_5_mean, size_t batch_normalization_5_mean_bytes, - void* batch_normalization_5_variance, size_t batch_normalization_5_variance_bytes, - void* depthwise_conv2d_3_w, size_t depthwise_conv2d_3_w_bytes, - void* batch_normalization_6_gamma, size_t batch_normalization_6_gamma_bytes, - void* batch_normalization_6_beta, size_t batch_normalization_6_beta_bytes, - void* batch_normalization_6_mean, size_t batch_normalization_6_mean_bytes, - void* batch_normalization_6_variance, size_t batch_normalization_6_variance_bytes, - void* conv2d_4_w, size_t conv2d_4_w_bytes, - void* batch_normalization_7_gamma, size_t batch_normalization_7_gamma_bytes, - void* batch_normalization_7_beta, size_t batch_normalization_7_beta_bytes, - void* batch_normalization_7_mean, size_t batch_normalization_7_mean_bytes, - void* batch_normalization_7_variance, size_t batch_normalization_7_variance_bytes, - void* depthwise_conv2d_4_w, size_t depthwise_conv2d_4_w_bytes, - void* batch_normalization_8_gamma, size_t batch_normalization_8_gamma_bytes, - void* batch_normalization_8_beta, size_t batch_normalization_8_beta_bytes, - void* batch_normalization_8_mean, size_t batch_normalization_8_mean_bytes, - void* batch_normalization_8_variance, size_t batch_normalization_8_variance_bytes, - void* conv2d_5_w, size_t conv2d_5_w_bytes, - void* batch_normalization_9_gamma, size_t batch_normalization_9_gamma_bytes, - void* batch_normalization_9_beta, size_t batch_normalization_9_beta_bytes, - void* batch_normalization_9_mean, size_t batch_normalization_9_mean_bytes, - void* batch_normalization_9_variance, size_t batch_normalization_9_variance_bytes, - void* depthwise_conv2d_5_w, size_t depthwise_conv2d_5_w_bytes, - void* batch_normalization_10_gamma, size_t batch_normalization_10_gamma_bytes, - void* batch_normalization_10_beta, size_t batch_normalization_10_beta_bytes, - void* batch_normalization_10_mean, size_t batch_normalization_10_mean_bytes, - void* batch_normalization_10_variance, size_t batch_normalization_10_variance_bytes, - void* conv2d_6_w, size_t conv2d_6_w_bytes, - void* batch_normalization_11_gamma, size_t batch_normalization_11_gamma_bytes, - void* batch_normalization_11_beta, size_t batch_normalization_11_beta_bytes, - void* batch_normalization_11_mean, size_t batch_normalization_11_mean_bytes, - void* batch_normalization_11_variance, size_t batch_normalization_11_variance_bytes, - void* depthwise_conv2d_6_w, size_t depthwise_conv2d_6_w_bytes, - void* batch_normalization_12_gamma, size_t batch_normalization_12_gamma_bytes, - void* batch_normalization_12_beta, size_t batch_normalization_12_beta_bytes, - void* batch_normalization_12_mean, size_t batch_normalization_12_mean_bytes, - void* batch_normalization_12_variance, size_t batch_normalization_12_variance_bytes, - void* conv2d_7_w, size_t conv2d_7_w_bytes, - void* batch_normalization_13_gamma, size_t batch_normalization_13_gamma_bytes, - void* batch_normalization_13_beta, size_t batch_normalization_13_beta_bytes, - void* batch_normalization_13_mean, size_t batch_normalization_13_mean_bytes, - void* batch_normalization_13_variance, size_t batch_normalization_13_variance_bytes, - void* dense_1_w, size_t dense_1_w_bytes, - void* dense_1_b, size_t dense_1_b_bytes){ - - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(68, input, conv2d_1_w, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, depthwise_conv2d_1_w, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, conv2d_2_w, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, depthwise_conv2d_2_w, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, conv2d_3_w, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, depthwise_conv2d_3_w, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, conv2d_4_w, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, depthwise_conv2d_4_w, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, conv2d_5_w, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, depthwise_conv2d_5_w, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, conv2d_6_w, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, depthwise_conv2d_6_w, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, conv2d_7_w, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, dense_1_w, dense_1_b, 0); - - - void* var_0 = __visc__createNodeND(0, var_0_node); - - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); - - void* var_1 = __visc__createNodeND(0, var_1_node); - - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); - __visc__bindIn(var_1, 6, 4, 0); - __visc__bindIn(var_1, 7, 5, 0); - __visc__bindIn(var_1, 8, 6, 0); - __visc__bindIn(var_1, 9, 7, 0); - __visc__bindIn(var_1, 10, 8, 0); - __visc__bindIn(var_1, 11, 9, 0); - - void* var_2 = __visc__createNodeND(0, var_2_node); - - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - - void* var_3 = __visc__createNodeND(0, var_3_node); - - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - __visc__bindIn(var_3, 12, 2, 0); - __visc__bindIn(var_3, 13, 3, 0); - - void* var_4 = __visc__createNodeND(0, var_4_node); - - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 14, 2, 0); - __visc__bindIn(var_4, 15, 3, 0); - __visc__bindIn(var_4, 16, 4, 0); - __visc__bindIn(var_4, 17, 5, 0); - __visc__bindIn(var_4, 18, 6, 0); - __visc__bindIn(var_4, 19, 7, 0); - __visc__bindIn(var_4, 20, 8, 0); - __visc__bindIn(var_4, 21, 9, 0); - - void* var_5 = __visc__createNodeND(0, var_5_node); - - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - - void* var_6 = __visc__createNodeND(0, var_6_node); - - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); - __visc__bindIn(var_6, 22, 2, 0); - __visc__bindIn(var_6, 23, 3, 0); - - void* var_7 = __visc__createNodeND(0, var_7_node); - - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); - __visc__bindIn(var_7, 24, 2, 0); - __visc__bindIn(var_7, 25, 3, 0); - __visc__bindIn(var_7, 26, 4, 0); - __visc__bindIn(var_7, 27, 5, 0); - __visc__bindIn(var_7, 28, 6, 0); - __visc__bindIn(var_7, 29, 7, 0); - __visc__bindIn(var_7, 30, 8, 0); - __visc__bindIn(var_7, 31, 9, 0); - - void* var_8 = __visc__createNodeND(0, var_8_node); - - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - - void* var_9 = __visc__createNodeND(0, var_9_node); - - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - __visc__bindIn(var_9, 32, 2, 0); - __visc__bindIn(var_9, 33, 3, 0); - - void* var_10 = __visc__createNodeND(0, var_10_node); - - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); - __visc__bindIn(var_10, 34, 2, 0); - __visc__bindIn(var_10, 35, 3, 0); - __visc__bindIn(var_10, 36, 4, 0); - __visc__bindIn(var_10, 37, 5, 0); - __visc__bindIn(var_10, 38, 6, 0); - __visc__bindIn(var_10, 39, 7, 0); - __visc__bindIn(var_10, 40, 8, 0); - __visc__bindIn(var_10, 41, 9, 0); - - void* var_11 = __visc__createNodeND(0, var_11_node); - - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - - void* var_12 = __visc__createNodeND(0, var_12_node); - - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - __visc__bindIn(var_12, 42, 2, 0); - __visc__bindIn(var_12, 43, 3, 0); - - void* var_13 = __visc__createNodeND(0, var_13_node); - - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); - __visc__bindIn(var_13, 44, 2, 0); - __visc__bindIn(var_13, 45, 3, 0); - __visc__bindIn(var_13, 46, 4, 0); - __visc__bindIn(var_13, 47, 5, 0); - __visc__bindIn(var_13, 48, 6, 0); - __visc__bindIn(var_13, 49, 7, 0); - __visc__bindIn(var_13, 50, 8, 0); - __visc__bindIn(var_13, 51, 9, 0); - - void* var_14 = __visc__createNodeND(0, var_14_node); - - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - - void* var_15 = __visc__createNodeND(0, var_15_node); - - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 52, 2, 0); - __visc__bindIn(var_15, 53, 3, 0); - - void* var_16 = __visc__createNodeND(0, var_16_node); - - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); - __visc__bindIn(var_16, 54, 2, 0); - __visc__bindIn(var_16, 55, 3, 0); - __visc__bindIn(var_16, 56, 4, 0); - __visc__bindIn(var_16, 57, 5, 0); - __visc__bindIn(var_16, 58, 6, 0); - __visc__bindIn(var_16, 59, 7, 0); - __visc__bindIn(var_16, 60, 8, 0); - __visc__bindIn(var_16, 61, 9, 0); - - void* var_17 = __visc__createNodeND(0, var_17_node); - - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); - - void* var_18 = __visc__createNodeND(0, var_18_node); - - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 62, 2, 0); - __visc__bindIn(var_18, 63, 3, 0); - - void* var_19 = __visc__createNodeND(0, var_19_node); - - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - __visc__bindIn(var_19, 64, 2, 0); - __visc__bindIn(var_19, 65, 3, 0); - __visc__bindIn(var_19, 66, 4, 0); - __visc__bindIn(var_19, 67, 5, 0); - __visc__bindIn(var_19, 68, 6, 0); - __visc__bindIn(var_19, 69, 7, 0); - __visc__bindIn(var_19, 70, 8, 0); - __visc__bindIn(var_19, 71, 9, 0); - - void* var_20 = __visc__createNodeND(0, var_20_node); - - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); - - void* var_21 = __visc__createNodeND(0, var_21_node); - - __visc__edge(var_20, var_21, 1, 0, 0, 0); - __visc__edge(var_20, var_21, 1, 1, 1, 0); - __visc__bindIn(var_21, 72, 2, 0); - __visc__bindIn(var_21, 73, 3, 0); - - void* var_22 = __visc__createNodeND(0, var_22_node); - - __visc__edge(var_21, var_22, 1, 0, 0, 0); - __visc__edge(var_21, var_22, 1, 1, 1, 0); - __visc__bindIn(var_22, 74, 2, 0); - __visc__bindIn(var_22, 75, 3, 0); - __visc__bindIn(var_22, 76, 4, 0); - __visc__bindIn(var_22, 77, 5, 0); - __visc__bindIn(var_22, 78, 6, 0); - __visc__bindIn(var_22, 79, 7, 0); - __visc__bindIn(var_22, 80, 8, 0); - __visc__bindIn(var_22, 81, 9, 0); - - void* var_23 = __visc__createNodeND(0, var_23_node); - - __visc__edge(var_22, var_23, 1, 0, 0, 0); - __visc__edge(var_22, var_23, 1, 1, 1, 0); - - void* var_24 = __visc__createNodeND(0, var_24_node); - - __visc__edge(var_23, var_24, 1, 0, 0, 0); - __visc__edge(var_23, var_24, 1, 1, 1, 0); - __visc__bindIn(var_24, 82, 2, 0); - __visc__bindIn(var_24, 83, 3, 0); - - void* var_25 = __visc__createNodeND(0, var_25_node); - - __visc__edge(var_24, var_25, 1, 0, 0, 0); - __visc__edge(var_24, var_25, 1, 1, 1, 0); - __visc__bindIn(var_25, 84, 2, 0); - __visc__bindIn(var_25, 85, 3, 0); - __visc__bindIn(var_25, 86, 4, 0); - __visc__bindIn(var_25, 87, 5, 0); - __visc__bindIn(var_25, 88, 6, 0); - __visc__bindIn(var_25, 89, 7, 0); - __visc__bindIn(var_25, 90, 8, 0); - __visc__bindIn(var_25, 91, 9, 0); - - void* var_26 = __visc__createNodeND(0, var_26_node); - - __visc__edge(var_25, var_26, 1, 0, 0, 0); - __visc__edge(var_25, var_26, 1, 1, 1, 0); - - void* var_27 = __visc__createNodeND(0, var_27_node); - - __visc__edge(var_26, var_27, 1, 0, 0, 0); - __visc__edge(var_26, var_27, 1, 1, 1, 0); - __visc__bindIn(var_27, 92, 2, 0); - __visc__bindIn(var_27, 93, 3, 0); - - void* var_28 = __visc__createNodeND(0, var_28_node); - - __visc__edge(var_27, var_28, 1, 0, 0, 0); - __visc__edge(var_27, var_28, 1, 1, 1, 0); - __visc__bindIn(var_28, 94, 2, 0); - __visc__bindIn(var_28, 95, 3, 0); - __visc__bindIn(var_28, 96, 4, 0); - __visc__bindIn(var_28, 97, 5, 0); - __visc__bindIn(var_28, 98, 6, 0); - __visc__bindIn(var_28, 99, 7, 0); - __visc__bindIn(var_28, 100, 8, 0); - __visc__bindIn(var_28, 101, 9, 0); - - void* var_29 = __visc__createNodeND(0, var_29_node); - - __visc__edge(var_28, var_29, 1, 0, 0, 0); - __visc__edge(var_28, var_29, 1, 1, 1, 0); - - void* var_30 = __visc__createNodeND(0, var_30_node); - - __visc__edge(var_29, var_30, 1, 0, 0, 0); - __visc__edge(var_29, var_30, 1, 1, 1, 0); - __visc__bindIn(var_30, 102, 2, 0); - __visc__bindIn(var_30, 103, 3, 0); - - void* var_31 = __visc__createNodeND(0, var_31_node); - - __visc__edge(var_30, var_31, 1, 0, 0, 0); - __visc__edge(var_30, var_31, 1, 1, 1, 0); - __visc__bindIn(var_31, 104, 2, 0); - __visc__bindIn(var_31, 105, 3, 0); - __visc__bindIn(var_31, 106, 4, 0); - __visc__bindIn(var_31, 107, 5, 0); - __visc__bindIn(var_31, 108, 6, 0); - __visc__bindIn(var_31, 109, 7, 0); - __visc__bindIn(var_31, 110, 8, 0); - __visc__bindIn(var_31, 111, 9, 0); - - void* var_32 = __visc__createNodeND(0, var_32_node); - - __visc__edge(var_31, var_32, 1, 0, 0, 0); - __visc__edge(var_31, var_32, 1, 1, 1, 0); - - void* var_33 = __visc__createNodeND(0, var_33_node); - - __visc__edge(var_32, var_33, 1, 0, 0, 0); - __visc__edge(var_32, var_33, 1, 1, 1, 0); - __visc__bindIn(var_33, 112, 2, 0); - __visc__bindIn(var_33, 113, 3, 0); - - void* var_34 = __visc__createNodeND(0, var_34_node); - - __visc__edge(var_33, var_34, 1, 0, 0, 0); - __visc__edge(var_33, var_34, 1, 1, 1, 0); - __visc__bindIn(var_34, 114, 2, 0); - __visc__bindIn(var_34, 115, 3, 0); - __visc__bindIn(var_34, 116, 4, 0); - __visc__bindIn(var_34, 117, 5, 0); - __visc__bindIn(var_34, 118, 6, 0); - __visc__bindIn(var_34, 119, 7, 0); - __visc__bindIn(var_34, 120, 8, 0); - __visc__bindIn(var_34, 121, 9, 0); - - void* var_35 = __visc__createNodeND(0, var_35_node); - - __visc__edge(var_34, var_35, 1, 0, 0, 0); - __visc__edge(var_34, var_35, 1, 1, 1, 0); - - void* var_36 = __visc__createNodeND(0, var_36_node); - - __visc__edge(var_35, var_36, 1, 0, 0, 0); - __visc__edge(var_35, var_36, 1, 1, 1, 0); - __visc__bindIn(var_36, 122, 2, 0); - __visc__bindIn(var_36, 123, 3, 0); - - void* var_37 = __visc__createNodeND(0, var_37_node); - - __visc__edge(var_36, var_37, 1, 0, 0, 0); - __visc__edge(var_36, var_37, 1, 1, 1, 0); - __visc__bindIn(var_37, 124, 2, 0); - __visc__bindIn(var_37, 125, 3, 0); - __visc__bindIn(var_37, 126, 4, 0); - __visc__bindIn(var_37, 127, 5, 0); - __visc__bindIn(var_37, 128, 6, 0); - __visc__bindIn(var_37, 129, 7, 0); - __visc__bindIn(var_37, 130, 8, 0); - __visc__bindIn(var_37, 131, 9, 0); - - void* var_38 = __visc__createNodeND(0, var_38_node); - - __visc__edge(var_37, var_38, 1, 0, 0, 0); - __visc__edge(var_37, var_38, 1, 1, 1, 0); - - void* var_39 = __visc__createNodeND(0, var_39_node); - - __visc__edge(var_38, var_39, 1, 0, 0, 0); - __visc__edge(var_38, var_39, 1, 1, 1, 0); - - void* var_40 = __visc__createNodeND(0, var_40_node); - - __visc__edge(var_39, var_40, 1, 0, 0, 0); - __visc__edge(var_39, var_40, 1, 1, 1, 0); - __visc__bindIn(var_40, 132, 2, 0); - __visc__bindIn(var_40, 133, 3, 0); - - void* var_41 = __visc__createNodeND(0, var_41_node); - - __visc__edge(var_40, var_41, 1, 0, 0, 0); - __visc__edge(var_40, var_41, 1, 1, 1, 0); - __visc__bindIn(var_41, 134, 2, 0); - __visc__bindIn(var_41, 135, 3, 0); - - void* var_42 = __visc__createNodeND(0, var_42_node); - - __visc__edge(var_41, var_42, 1, 0, 0, 0); - __visc__edge(var_41, var_42, 1, 1, 1, 0); - - __visc__bindOut(var_42, 0, 0, 0); - __visc__bindOut(var_42, 1, 1, 0); - -} - -struct ret_t { - void* tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void* input; - size_t input_bytes; - void* conv2d_1_w; - size_t conv2d_1_w_bytes; - void* batch_normalization_1_gamma; - size_t batch_normalization_1_gamma_bytes; - void* batch_normalization_1_beta; - size_t batch_normalization_1_beta_bytes; - void* batch_normalization_1_mean; - size_t batch_normalization_1_mean_bytes; - void* batch_normalization_1_variance; - size_t batch_normalization_1_variance_bytes; - void* depthwise_conv2d_1_w; - size_t depthwise_conv2d_1_w_bytes; - void* batch_normalization_2_gamma; - size_t batch_normalization_2_gamma_bytes; - void* batch_normalization_2_beta; - size_t batch_normalization_2_beta_bytes; - void* batch_normalization_2_mean; - size_t batch_normalization_2_mean_bytes; - void* batch_normalization_2_variance; - size_t batch_normalization_2_variance_bytes; - void* conv2d_2_w; - size_t conv2d_2_w_bytes; - void* batch_normalization_3_gamma; - size_t batch_normalization_3_gamma_bytes; - void* batch_normalization_3_beta; - size_t batch_normalization_3_beta_bytes; - void* batch_normalization_3_mean; - size_t batch_normalization_3_mean_bytes; - void* batch_normalization_3_variance; - size_t batch_normalization_3_variance_bytes; - void* depthwise_conv2d_2_w; - size_t depthwise_conv2d_2_w_bytes; - void* batch_normalization_4_gamma; - size_t batch_normalization_4_gamma_bytes; - void* batch_normalization_4_beta; - size_t batch_normalization_4_beta_bytes; - void* batch_normalization_4_mean; - size_t batch_normalization_4_mean_bytes; - void* batch_normalization_4_variance; - size_t batch_normalization_4_variance_bytes; - void* conv2d_3_w; - size_t conv2d_3_w_bytes; - void* batch_normalization_5_gamma; - size_t batch_normalization_5_gamma_bytes; - void* batch_normalization_5_beta; - size_t batch_normalization_5_beta_bytes; - void* batch_normalization_5_mean; - size_t batch_normalization_5_mean_bytes; - void* batch_normalization_5_variance; - size_t batch_normalization_5_variance_bytes; - void* depthwise_conv2d_3_w; - size_t depthwise_conv2d_3_w_bytes; - void* batch_normalization_6_gamma; - size_t batch_normalization_6_gamma_bytes; - void* batch_normalization_6_beta; - size_t batch_normalization_6_beta_bytes; - void* batch_normalization_6_mean; - size_t batch_normalization_6_mean_bytes; - void* batch_normalization_6_variance; - size_t batch_normalization_6_variance_bytes; - void* conv2d_4_w; - size_t conv2d_4_w_bytes; - void* batch_normalization_7_gamma; - size_t batch_normalization_7_gamma_bytes; - void* batch_normalization_7_beta; - size_t batch_normalization_7_beta_bytes; - void* batch_normalization_7_mean; - size_t batch_normalization_7_mean_bytes; - void* batch_normalization_7_variance; - size_t batch_normalization_7_variance_bytes; - void* depthwise_conv2d_4_w; - size_t depthwise_conv2d_4_w_bytes; - void* batch_normalization_8_gamma; - size_t batch_normalization_8_gamma_bytes; - void* batch_normalization_8_beta; - size_t batch_normalization_8_beta_bytes; - void* batch_normalization_8_mean; - size_t batch_normalization_8_mean_bytes; - void* batch_normalization_8_variance; - size_t batch_normalization_8_variance_bytes; - void* conv2d_5_w; - size_t conv2d_5_w_bytes; - void* batch_normalization_9_gamma; - size_t batch_normalization_9_gamma_bytes; - void* batch_normalization_9_beta; - size_t batch_normalization_9_beta_bytes; - void* batch_normalization_9_mean; - size_t batch_normalization_9_mean_bytes; - void* batch_normalization_9_variance; - size_t batch_normalization_9_variance_bytes; - void* depthwise_conv2d_5_w; - size_t depthwise_conv2d_5_w_bytes; - void* batch_normalization_10_gamma; - size_t batch_normalization_10_gamma_bytes; - void* batch_normalization_10_beta; - size_t batch_normalization_10_beta_bytes; - void* batch_normalization_10_mean; - size_t batch_normalization_10_mean_bytes; - void* batch_normalization_10_variance; - size_t batch_normalization_10_variance_bytes; - void* conv2d_6_w; - size_t conv2d_6_w_bytes; - void* batch_normalization_11_gamma; - size_t batch_normalization_11_gamma_bytes; - void* batch_normalization_11_beta; - size_t batch_normalization_11_beta_bytes; - void* batch_normalization_11_mean; - size_t batch_normalization_11_mean_bytes; - void* batch_normalization_11_variance; - size_t batch_normalization_11_variance_bytes; - void* depthwise_conv2d_6_w; - size_t depthwise_conv2d_6_w_bytes; - void* batch_normalization_12_gamma; - size_t batch_normalization_12_gamma_bytes; - void* batch_normalization_12_beta; - size_t batch_normalization_12_beta_bytes; - void* batch_normalization_12_mean; - size_t batch_normalization_12_mean_bytes; - void* batch_normalization_12_variance; - size_t batch_normalization_12_variance_bytes; - void* conv2d_7_w; - size_t conv2d_7_w_bytes; - void* batch_normalization_13_gamma; - size_t batch_normalization_13_gamma_bytes; - void* batch_normalization_13_beta; - size_t batch_normalization_13_beta_bytes; - void* batch_normalization_13_mean; - size_t batch_normalization_13_mean_bytes; - void* batch_normalization_13_variance; - size_t batch_normalization_13_variance_bytes; - void* dense_1_w; - size_t dense_1_w_bytes; - void* dense_1_b; - size_t dense_1_b_bytes; - - struct ret_t r; -} -RootIn; - -int main(){ - -std::string dir_prefix = std::string("data/mobilenet_shallow_nathan/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -uint8_t* labels = readLabels(labels_path.c_str(),10000); - -__visc__init(); -RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - -args->input = input; -args->input_bytes = 0; -args->conv2d_1_w = conv2d_1_w; -args->conv2d_1_w_bytes = 0; -args->batch_normalization_1_gamma = batch_normalization_1_gamma; -args->batch_normalization_1_gamma_bytes = 0; -args->batch_normalization_1_beta = batch_normalization_1_beta; -args->batch_normalization_1_beta_bytes = 0; -args->batch_normalization_1_mean = batch_normalization_1_mean; -args->batch_normalization_1_mean_bytes = 0; -args->batch_normalization_1_variance = batch_normalization_1_variance; -args->batch_normalization_1_variance_bytes = 0; -args->depthwise_conv2d_1_w = depthwise_conv2d_1_w; -args->depthwise_conv2d_1_w_bytes = 0; -args->batch_normalization_2_gamma = batch_normalization_2_gamma; -args->batch_normalization_2_gamma_bytes = 0; -args->batch_normalization_2_beta = batch_normalization_2_beta; -args->batch_normalization_2_beta_bytes = 0; -args->batch_normalization_2_mean = batch_normalization_2_mean; -args->batch_normalization_2_mean_bytes = 0; -args->batch_normalization_2_variance = batch_normalization_2_variance; -args->batch_normalization_2_variance_bytes = 0; -args->conv2d_2_w = conv2d_2_w; -args->conv2d_2_w_bytes = 0; -args->batch_normalization_3_gamma = batch_normalization_3_gamma; -args->batch_normalization_3_gamma_bytes = 0; -args->batch_normalization_3_beta = batch_normalization_3_beta; -args->batch_normalization_3_beta_bytes = 0; -args->batch_normalization_3_mean = batch_normalization_3_mean; -args->batch_normalization_3_mean_bytes = 0; -args->batch_normalization_3_variance = batch_normalization_3_variance; -args->batch_normalization_3_variance_bytes = 0; -args->depthwise_conv2d_2_w = depthwise_conv2d_2_w; -args->depthwise_conv2d_2_w_bytes = 0; -args->batch_normalization_4_gamma = batch_normalization_4_gamma; -args->batch_normalization_4_gamma_bytes = 0; -args->batch_normalization_4_beta = batch_normalization_4_beta; -args->batch_normalization_4_beta_bytes = 0; -args->batch_normalization_4_mean = batch_normalization_4_mean; -args->batch_normalization_4_mean_bytes = 0; -args->batch_normalization_4_variance = batch_normalization_4_variance; -args->batch_normalization_4_variance_bytes = 0; -args->conv2d_3_w = conv2d_3_w; -args->conv2d_3_w_bytes = 0; -args->batch_normalization_5_gamma = batch_normalization_5_gamma; -args->batch_normalization_5_gamma_bytes = 0; -args->batch_normalization_5_beta = batch_normalization_5_beta; -args->batch_normalization_5_beta_bytes = 0; -args->batch_normalization_5_mean = batch_normalization_5_mean; -args->batch_normalization_5_mean_bytes = 0; -args->batch_normalization_5_variance = batch_normalization_5_variance; -args->batch_normalization_5_variance_bytes = 0; -args->depthwise_conv2d_3_w = depthwise_conv2d_3_w; -args->depthwise_conv2d_3_w_bytes = 0; -args->batch_normalization_6_gamma = batch_normalization_6_gamma; -args->batch_normalization_6_gamma_bytes = 0; -args->batch_normalization_6_beta = batch_normalization_6_beta; -args->batch_normalization_6_beta_bytes = 0; -args->batch_normalization_6_mean = batch_normalization_6_mean; -args->batch_normalization_6_mean_bytes = 0; -args->batch_normalization_6_variance = batch_normalization_6_variance; -args->batch_normalization_6_variance_bytes = 0; -args->conv2d_4_w = conv2d_4_w; -args->conv2d_4_w_bytes = 0; -args->batch_normalization_7_gamma = batch_normalization_7_gamma; -args->batch_normalization_7_gamma_bytes = 0; -args->batch_normalization_7_beta = batch_normalization_7_beta; -args->batch_normalization_7_beta_bytes = 0; -args->batch_normalization_7_mean = batch_normalization_7_mean; -args->batch_normalization_7_mean_bytes = 0; -args->batch_normalization_7_variance = batch_normalization_7_variance; -args->batch_normalization_7_variance_bytes = 0; -args->depthwise_conv2d_4_w = depthwise_conv2d_4_w; -args->depthwise_conv2d_4_w_bytes = 0; -args->batch_normalization_8_gamma = batch_normalization_8_gamma; -args->batch_normalization_8_gamma_bytes = 0; -args->batch_normalization_8_beta = batch_normalization_8_beta; -args->batch_normalization_8_beta_bytes = 0; -args->batch_normalization_8_mean = batch_normalization_8_mean; -args->batch_normalization_8_mean_bytes = 0; -args->batch_normalization_8_variance = batch_normalization_8_variance; -args->batch_normalization_8_variance_bytes = 0; -args->conv2d_5_w = conv2d_5_w; -args->conv2d_5_w_bytes = 0; -args->batch_normalization_9_gamma = batch_normalization_9_gamma; -args->batch_normalization_9_gamma_bytes = 0; -args->batch_normalization_9_beta = batch_normalization_9_beta; -args->batch_normalization_9_beta_bytes = 0; -args->batch_normalization_9_mean = batch_normalization_9_mean; -args->batch_normalization_9_mean_bytes = 0; -args->batch_normalization_9_variance = batch_normalization_9_variance; -args->batch_normalization_9_variance_bytes = 0; -args->depthwise_conv2d_5_w = depthwise_conv2d_5_w; -args->depthwise_conv2d_5_w_bytes = 0; -args->batch_normalization_10_gamma = batch_normalization_10_gamma; -args->batch_normalization_10_gamma_bytes = 0; -args->batch_normalization_10_beta = batch_normalization_10_beta; -args->batch_normalization_10_beta_bytes = 0; -args->batch_normalization_10_mean = batch_normalization_10_mean; -args->batch_normalization_10_mean_bytes = 0; -args->batch_normalization_10_variance = batch_normalization_10_variance; -args->batch_normalization_10_variance_bytes = 0; -args->conv2d_6_w = conv2d_6_w; -args->conv2d_6_w_bytes = 0; -args->batch_normalization_11_gamma = batch_normalization_11_gamma; -args->batch_normalization_11_gamma_bytes = 0; -args->batch_normalization_11_beta = batch_normalization_11_beta; -args->batch_normalization_11_beta_bytes = 0; -args->batch_normalization_11_mean = batch_normalization_11_mean; -args->batch_normalization_11_mean_bytes = 0; -args->batch_normalization_11_variance = batch_normalization_11_variance; -args->batch_normalization_11_variance_bytes = 0; -args->depthwise_conv2d_6_w = depthwise_conv2d_6_w; -args->depthwise_conv2d_6_w_bytes = 0; -args->batch_normalization_12_gamma = batch_normalization_12_gamma; -args->batch_normalization_12_gamma_bytes = 0; -args->batch_normalization_12_beta = batch_normalization_12_beta; -args->batch_normalization_12_beta_bytes = 0; -args->batch_normalization_12_mean = batch_normalization_12_mean; -args->batch_normalization_12_mean_bytes = 0; -args->batch_normalization_12_variance = batch_normalization_12_variance; -args->batch_normalization_12_variance_bytes = 0; -args->conv2d_7_w = conv2d_7_w; -args->conv2d_7_w_bytes = 0; -args->batch_normalization_13_gamma = batch_normalization_13_gamma; -args->batch_normalization_13_gamma_bytes = 0; -args->batch_normalization_13_beta = batch_normalization_13_beta; -args->batch_normalization_13_beta_bytes = 0; -args->batch_normalization_13_mean = batch_normalization_13_mean; -args->batch_normalization_13_mean_bytes = 0; -args->batch_normalization_13_variance = batch_normalization_13_variance; -args->batch_normalization_13_variance_bytes = 0; -args->dense_1_w = dense_1_w; -args->dense_1_w_bytes = 0; -args->dense_1_b = dense_1_b; -args->dense_1_b_bytes = 0; - -void* dfg = __visc__launch(0, root, (void*) args); - -__visc__wait(dfg); - -void *result = static_cast<RootIn*>(args)->input; -hpvm_request_tensor(result, 0); - -__visc__cleanup(); - computeAccuracy2(labels, 10000, result); -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_beta.bin deleted file mode 100644 index 5d9a0d95865637cfb783fb9a56d3ff2ecb57e868..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_gamma.bin deleted file mode 100644 index 71147ba51b53f9b5f8ed84d3e12b3f60d04e88f0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_mean.bin deleted file mode 100644 index f75ef27a6bde8cf45607b0e7957603ad5c767928..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_variance.bin deleted file mode 100644 index cdbb02d6dcc67a983c949224e5ef2356cbed70ec..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_10_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_beta.bin deleted file mode 100644 index a6d770acd50df688be127899d5ebc76a6b660108..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_gamma.bin deleted file mode 100644 index 7d2add83b878940a6e83ff33ac8328b08218b036..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_mean.bin deleted file mode 100644 index 481fa2d212a171377d79b38765b42481939abd0f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_variance.bin deleted file mode 100644 index 99b00e0a82730dbf49cc6112379b6106b3538f24..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_11_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_beta.bin deleted file mode 100644 index e2fa099a1b5df7840c7b5b2c8b9ec83bad07f238..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_gamma.bin deleted file mode 100644 index 2c6d46a8c35a83ea5929e7b0b06980baf1ea8b08..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_mean.bin deleted file mode 100644 index 4c46529e2774bb4fed9337394213ddfd6fa3b7a4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_variance.bin deleted file mode 100644 index 8afde358ed8dffed9eca531e3ced41953036c926..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_12_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_beta.bin deleted file mode 100644 index 5192e8414e7349eb49f139c31d688349dfcaa915..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_gamma.bin deleted file mode 100644 index c7ba0b707e96c024bbdcf825a28f78522685b7e2..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_mean.bin deleted file mode 100644 index 41c23352862bc90c6cb298fbda821712c919673b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_variance.bin deleted file mode 100644 index 67aa92699f5da3e6384e2502fce4cf985d207e2c..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_13_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_beta.bin deleted file mode 100644 index 05d61c8e00f196b83dde7de794cc9feff2929582..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_beta.bin +++ /dev/null @@ -1,2 +0,0 @@ -êâ®>@Hx>„jb¾Çå>:Ù*>YR¡>nÍ>ù}u?ýæ>¤ÌB?|¾bç&?ÇO?e=”?¿‹¸¾f'e¾8ƒ¸½è‹Î;Éï:?Ó -‹?ˆþ„>½»>ŸÞ>î‚?˜5A?|6žàÒ>²•Ý¼Þ>?Ä1?nŸ¾u‡‚> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_gamma.bin deleted file mode 100644 index 1aaaeaa110d8b9eb8108a1546302f8d5c1c12c35..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_gamma.bin +++ /dev/null @@ -1,2 +0,0 @@ -³]ƒ?Ç/?®0€?]…o?ƒ?ߌ?Vň?`Z?Gk„?±*`?žÐƒ?pÈ~? *?+g ?4 -u?Òü?Qƒ?|?š O??ä?K{?^5Œ?ÃÜ‚?£?…ØA?žTr?1y?÷€?!56?&ñƒ?HWv?§^‚? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_mean.bin deleted file mode 100644 index ffcd3adfeac9b601872fa59caa42601fdc10494c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_mean.bin +++ /dev/null @@ -1 +0,0 @@ -ˆG¢¹$=ã®Ý;9Oü¼ø-'<p;K< à»Þâ€;žcé¼K!¼“¿œ<õön¼M9 ;¬…;’½î3ì<¯Á;¨`…<|¾¼þæ:¼;ÿ%=÷»L¶Z»)z·»ªfºS:Œ<j*>»¾M<¨u» û;%;þ;ñ&J: \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_variance.bin deleted file mode 100644 index f29dc5a9db7e4fe9783917749bd151ce80e40702..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_1_variance.bin +++ /dev/null @@ -1 +0,0 @@ -ÃÅn@éA²£š?ä+@"@9ÛÞ@áÀ(@•¹÷>Ò¢Á@ƒ'E@)¡@øˆ+@œZž>«Ç?A?¤A˜x°A0ªª?®¯#AÿΕ@«Vì>~ÑÅAg«“@VúAÿ>;j@š”@j¯ø?.AB¾>œê;@ø#û?Q ~@ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_beta.bin deleted file mode 100644 index ba12532332cec1d6ee20d16d04be81575a8f0802..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_beta.bin +++ /dev/null @@ -1 +0,0 @@ -N?¶k©¾¬r>{_?kÙy¾R?fÀ?ä%Q?“k¾åœ?—õ^½go?=9L>A†?ím½Ôm ¿†Ç½R?²¾íO‡?àhv?ìt4¾ÙN?cá~?i«Ÿ?¹[?•ï¾<M_>Êõö¾>ðn½rÞ¾? Ž?¼êªE= \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_gamma.bin deleted file mode 100644 index bf0dd075d19280dfcda711fd95eeab6fb429b8f5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_gamma.bin +++ /dev/null @@ -1 +0,0 @@ -L¾ö>üiQ?-)]?!0?Lw_?`Ë…?ö%4?sÚ¢?²È7?œé`?Ò¿b?¬Y?hv?MC @\ÞY?ñ8P?”Ü\?»QI?ë8?ʤ‘?Ëîl?`¥&?,S?&›?wxr?Ãl_?@8l?k<,?6Ä…?Y={?YjC?2L>? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_mean.bin deleted file mode 100644 index faec424f63fab99e4ba00101a005c1b84cb2f8f3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_mean.bin +++ /dev/null @@ -1,2 +0,0 @@ -’D´¿hJ¿úVN¿ŠÊ>?Ú%8¿¶œf?U_C¾]º¤½†Ã?çv=Ü"¾Ï6¾g½&¾òß[>`"¿0Õ8>•¿Fô¦¾i³?Z?=鯿‰\TÀ 5[?&Ñ6>Ÿÿ.¿ýkn¾Ú4•>p%5¿Î(>"×>Ù -¿p²M> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_variance.bin deleted file mode 100644 index 80125312bc29cc27bcb9a51db2d206c70b19c25d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_2_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_beta.bin deleted file mode 100644 index 0a7e5127f93e4f3e77893c02209dd34f92fcde00..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_beta.bin +++ /dev/null @@ -1,2 +0,0 @@ -ùEͽý¨½õž9¾pÊ>Ûàÿ½›³>Ÿÿ²>£f>’ë¾Ó‹Ý¹"å>¼½–¾ -ð~¾½¼¾Üöý==Á‹=ä!>D>°©>’çn>£ç;(+Ö=Ÿ‡?Üÿl?m¾$0`>¥¡<¸Ïy?Vál?‚?(Ò'½Y?o>uƒ@>€q>…ö>ë}î>fÓW?K>¿» ?)?F¿¾Ÿ¯Y¾Ý¯?·°—>YdL¾–ZC?þÕ÷=?pUV?þV]>›Â©¾å=9{/>É’Ž½«U>¸ŠÖ½*Ï&>1Í;Mj1¾B‡A=™¾ü½æ…)?!3? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_gamma.bin deleted file mode 100644 index ab4be7e7af315799ddc2f371e09442d81c81ec9e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_mean.bin deleted file mode 100644 index 5c4cccbc2d7756430aba85f100d164425b7b7559..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_variance.bin deleted file mode 100644 index 88e0320d9764ac47a0ffeccd912430db4e3a70ad..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_3_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_beta.bin deleted file mode 100644 index 78b0f312269445116d4b9e05d3f2f85730509d46..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_gamma.bin deleted file mode 100644 index cc9ac2a0fcc9dc57b61c54d13f9cdaba8bf045c9..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_mean.bin deleted file mode 100644 index e184ea4954ffe0e8070fd467bc90093c142ee754..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_variance.bin deleted file mode 100644 index dd6c0672454934523c04c2e124bb64d024c2207f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_4_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_beta.bin deleted file mode 100644 index d111c363bdab8b36db98fcefcd2eb61e080eadd4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_gamma.bin deleted file mode 100644 index aae71935a9ec2124e203c921e2d2ca570f3aa2a8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_mean.bin deleted file mode 100644 index b4675bad00eddb39999c5411eb225f9d13a22fc4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_variance.bin deleted file mode 100644 index f8126c266f398a9013241ee5d97fe42beaa5bb37..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_5_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_beta.bin deleted file mode 100644 index c18a950b0d0acca31f82e84135c392e348868011..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_gamma.bin deleted file mode 100644 index 92bc587a86c98aadc5549f3da65b4f74e812b2fb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_mean.bin deleted file mode 100644 index c888f2c909ac6d95871fe944b6b4f51242d4eb8a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_variance.bin deleted file mode 100644 index a5a799857b7cc50a9aa8208aab08e7270dccca5b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_6_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_beta.bin deleted file mode 100644 index ab02be5f352315724b5ca3b59e33ff085f46207d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_gamma.bin deleted file mode 100644 index 72c58ae29db08ac94c3b9b778ea015405cb9d3f6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_mean.bin deleted file mode 100644 index 7f0e01a07c23faa2101cbf299ea9d35fe3d5e3ec..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_variance.bin deleted file mode 100644 index 094474aca2ad49d1400c71d9acbfcd1631c7be18..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_7_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_beta.bin deleted file mode 100644 index 5f92c58a7c47c207a98a77f6961410d08e8446f0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_gamma.bin deleted file mode 100644 index 6ab36ce54740e9e4a4e4948684e0e4fbbd71b1cb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_gamma.bin +++ /dev/null @@ -1,3 +0,0 @@ -œ®?œË?®m‚?…4?¸µ?µ¦?ã?‡F'?nÑ#? -=?sˆ.?V>“?m¹?æÓD? SÁ?§7?Š›¼? ‡?É&?qä™?§§?Ä'?"ZN?aég?"ò*?&.)?ÕM?%??Ы?%Œ1?Å:?æ! ?”O:?ÐѨ?9½*?õ¬?w¨?ÎÁT?HÆ -?%„?ª)?‚e&?x°?‡Ë?µ«±?ÏÜ?C1?w¯?kì>? ?uD?£òˆ?²’Ÿ?µ¢G?!0?Ñãœ?eŠ2?!b$?úK¬? d?Ü6¨?è‰?2À¹?÷f?ñ?ᆱ?»g?zT@?<e,?Å@7?z‚0?W*:?)‘+?£?(?«Ø;?ùI±?·Œ?Œ ?²Í?Ä2?o¶ª?¾)? ?N³V?ßHž?Ç-?ü,-??#?HÄ?¾À}?pG?Yù#?HÁ?0è?„U?'%?u‘?£Í?O«?©é¨?ÉN›?£ED?@œ?¨À˜?v™?>²?G²?”?¶?‡ ?Y·¬?j?´ØP?^À?Xܘ?…ì?°bG?¨¨?ã§?r¨+?绤?ŸG£?¿¬E?&6%?áˆ?ô²?" £?¿®›?-4? \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_mean.bin deleted file mode 100644 index 5d093813a0926dd3c1c67b6f50e092465101fde9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_mean.bin +++ /dev/null @@ -1,4 +0,0 @@ -¡‰>- ¿Â¿ZF%>Õ# -¿7æ¾*~>Šß;>€Ó3>P^P>¯ÓX>íÝ¿2 Ë=Æô”>¡o"¿©àÕ>EX$¿30 >œË©<tпh{W>LÕˆ>¨Â>KZâ>+_>;™>€g¾>ÒjÇ>ô¿C‘>í>I“#¿2‹Ï>üfÁ¾‡ø+>4rc>•—˾‘÷´>ZœÕ=V±ì=Ü^é=ƒv‘>[;û¾sˆ=ÓŒ¿Ë“>@AI>¸:¿ÿäD>c˜Z>Žè–>'Š¿t7¿¬?~>gÈ{=ê© ¿9½>Jr'>ðg¿(=d>X)ÿ¾œÉL>)I¿{>ÛfC>ÅS¿”š¥>ô„Ï>í?¹>O>=>>¥>ˆ§‡>¿]&=¡q…>‘)¿Î -¿-N>Cü=H÷>Úªø¾9>êü>+Ó>Û¿ÀDB>xÈn>²ï9>h›[>Æ•ì¾ôn–>ušp>|Zi>ûÊ—>ÕY>|‚7>îÓ‹=‹Ú÷=ðŠ´¾=—¿ µÓ¾Œ_^>Òr!¿bâ¾ÏÍÞ¾Ô>#é¿ŒrH¿¾îƒ>ñ÷¿fP>åB>â:/¿I ƾà(r>[ïa>` -¿O£¿c@à=A¯Û¾Y'¿‚°>3>P2F> ¿Ä ¿ò¿&;> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_variance.bin deleted file mode 100644 index edf6463b0ca999595327a9dc300242a9e58c1fb8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_8_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_beta.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_beta.bin deleted file mode 100644 index ad3f1dc8965ba641749d65a5d0c5b32ab40c5dd4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_beta.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_gamma.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_gamma.bin deleted file mode 100644 index ec2b90646b3c7f21565e4972638e746e71a2b5bb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_gamma.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_mean.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_mean.bin deleted file mode 100644 index 47b2393cf22e01162577be3e361a1a40caec6bb8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_mean.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_variance.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_variance.bin deleted file mode 100644 index fb0c96059789a653f0d064e2c4743287b213d90d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/batch_normalization_9_variance.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_1_w.bin deleted file mode 100644 index 3e10934df8c5194e89ced8a8c6dfc0c496d63659..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_2_w.bin deleted file mode 100644 index b156a80dbbad1956afde6c953b760fe3147f86dd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_3_w.bin deleted file mode 100644 index 39ccf4d05b623c02ad5c86aa537804df697b2eca..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_4_w.bin deleted file mode 100644 index 19fa2c8035b9439be46392feee277b1e2c796994..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_5_w.bin deleted file mode 100644 index 79d3b1efe6c1d18ce86fea69602f161425c76421..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_6_w.bin deleted file mode 100644 index fc7d758888153e7a52ebb59e8db7822d5ca58283..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_7_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_7_w.bin deleted file mode 100644 index d569ea19a45477b991af7bce4aa14289bb3858a4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/conv2d_7_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/dense_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/dense_1_b.bin deleted file mode 100644 index dde75645d79ba2039e975a4cb2892f2cdca58038..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/dense_1_b.bin +++ /dev/null @@ -1 +0,0 @@ -êÞ^>ÂX`¾q=Ï·‡>Hp‚>°¾¾B—b>6ÁU¾$ƒt¾½M¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/dense_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/dense_1_w.bin deleted file mode 100644 index e053b5d9d9ca19466225106fd9ad109d55e32cdb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/dense_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_1_w.bin deleted file mode 100644 index b0948ad7c455ab26b7a500823da78dd2ebdf5a2f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_2_w.bin deleted file mode 100644 index 673879938fec8d6cea506ceba413479fe5305a72..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_3_w.bin deleted file mode 100644 index 19e9c200ad108dcafbdac74c614b3fe637a76e0b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_4_w.bin deleted file mode 100644 index 036b5573250744da275f27bca679c5eea90f8d67..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_5_w.bin deleted file mode 100644 index 870049e69e3783cf45939876c6b8717033d6cce7..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_6_w.bin deleted file mode 100644 index f23ffe4c99eaac8f9f6d96d48f7312e25347f86f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/depthwise_conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/input.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/input.bin deleted file mode 100644 index 793e873758141ad74020bcb21cfe55fda29be851..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/input.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/labels.bin deleted file mode 100644 index af228a267c6c651a76b7d719f8d44202ed4c0eae..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/labels32.bin b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/labels32.bin deleted file mode 100644 index 24b800b7002207fa05a7976e08c05cf5f72aa91d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/labels32.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/layer_composition.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/layer_composition.txt deleted file mode 100644 index 9b8b3f7e11a428a28fecbde2c204bf39b7e02703..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/layer_composition.txt +++ /dev/null @@ -1,41 +0,0 @@ -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -depthwise_conv -batchnorm -activation -conv -batchnorm -activation -pool -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/layers.txt b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/layers.txt deleted file mode 100644 index a9415755180a7ebdceb89b7e3e6d6cee258b18c4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/layers.txt +++ /dev/null @@ -1,41 +0,0 @@ -Conv1,10000,3,32,32,32,3,3,3 -#tensorBatchNorm1 -#tensorRelu1 -#tensorDepthwiseConv1 -#tensorBatchNorm2 -#tensorRelu2 -Conv2,10000,32,32,32,64,32,1,1 -#tensorBatchNorm3 -#tensorRelu3 -#tensorDepthwiseConv2 -#tensorBatchNorm4 -#tensorRelu4 -Conv3,10000,64,16,16,128,64,1,1 -#tensorBatchNorm5 -#tensorRelu5 -#tensorDepthwiseConv3 -#tensorBatchNorm6 -#tensorRelu6 -Conv4,10000,128,16,16,128,128,1,1 -#tensorBatchNorm7 -#tensorRelu7 -#tensorDepthwiseConv4 -#tensorBatchNorm8 -#tensorRelu8 -Conv5,10000,128,8,8,256,128,1,1 -#tensorBatchNorm9 -#tensorRelu9 -#tensorDepthwiseConv5 -#tensorBatchNorm10 -#tensorRelu10 -Conv6,10000,256,8,8,256,256,1,1 -#tensorBatchNorm11 -#tensorRelu11 -#tensorDepthwiseConv6 -#tensorBatchNorm12 -#tensorRelu12 -Conv7,10000,256,4,4,512,256,1,1 -#tensorBatchNorm13 -#tensorRelu13 -#tensorPooling1 -FC1,10000,2048,2048,10 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/promise_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/promise_src.cc deleted file mode 100644 index c5fd3606da51281bc2c583e98f024bd2f54f837b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/promise_src.cc +++ /dev/null @@ -1,238 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - -int total_runs = 100; -for (int i = 0 ; i < total_runs; i++){ - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - - - -std::string dir_prefix = std::string("data/mobilenet_shallow_nathan/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.5164621164798737, 1.6472081774473288, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -9.868980642318725, 10.560956018447879, 9); -void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); -void* var_2 = tensorRelu(var_1); -void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); -void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); -void* var_5 = tensorRelu(var_4); -void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.821381127357554, conv2d_2_w, -1.1834390873908995, 1.2731596627235617, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -9.875998497009277, 7.51305247974393, 9); -void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); -void* var_8 = tensorRelu(var_7); -void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); -void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); -void* var_11 = tensorRelu(var_10); -void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.826067455768602, conv2d_3_w, -0.599876856982708, 0.6812073457241064, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.633289833068848, 5.177892235755925, 9); -void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); -void* var_14 = tensorRelu(var_13); -void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); -void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); -void* var_17 = tensorRelu(var_16); -void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.02646304416659, conv2d_4_w, -0.4555967862010002, 0.4942613914608956, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.316803941726685, 4.605850250244146, 9); -void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); -void* var_20 = tensorRelu(var_19); -void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); -void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); -void* var_23 = tensorRelu(var_22); -void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 4.532649063110355, conv2d_5_w, -0.35657615590095515, 0.3382165088057521, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.1012511816024775, 4.3630500688553, 9); -void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); -void* var_26 = tensorRelu(var_25); -void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); -void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); -void* var_29 = tensorRelu(var_28); -void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.9874704387188977, conv2d_6_w, -0.28502783328294756, 0.28604640334844594, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.243851703643799, 3.486250406742097, 9); -void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); -void* var_32 = tensorRelu(var_31); -void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); -void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); -void* var_35 = tensorRelu(var_34); -void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 6.563065901756522, conv2d_7_w, -0.18946402323246003, 0.19012390717864017, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.938115713119507, 3.538363476753238, 9); -void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorPooling(var_38,1,2,2,0,0,2,2); -void* var_40 = FCLayer_PROMISE(var_39, 0.0, 1.8908388000727185, dense_1_w, -0.35140394401550296, 0.422872786462307, dense_1_b, -0.23878151, 0.26507422, -1, -14.630816223144532, 27.27252123260504, 9); -void* var_41 = tensorSoftmax(var_40); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_41); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -} - -dumpExecutionAccuracies(); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/src.cc deleted file mode 100644 index 6599f7d0ea0be6a76c4154d25b3a7be2c6724115..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/mobilenet_shallow/src.cc +++ /dev/null @@ -1,231 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - -std::string dir_prefix = std::string("data/mobilenet_shallow_nathan/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); -void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); -void* var_2 = tensorRelu(var_1); -void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); -void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); -void* var_6 = tensorRelu(var_5); -void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); -void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); -void* var_9 = tensorRelu(var_8); -void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); -void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); -void* var_13 = tensorRelu(var_12); -void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); -void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); -void* var_16 = tensorRelu(var_15); -void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); -void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); -void* var_20 = tensorRelu(var_19); -void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); -void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); -void* var_23 = tensorRelu(var_22); -void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); -void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); -void* var_28 = tensorRelu(var_27); -void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); -void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); -void* var_31 = tensorRelu(var_30); -void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); -void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); -void* var_35 = tensorRelu(var_34); -void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); -void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); -void* var_38 = tensorRelu(var_37); -void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); -void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); -void* var_43 = tensorRelu(var_42); -void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); -void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); -void* var_46 = tensorRelu(var_45); -void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); -void* var_49 = tensorGemmGPU(var_47, dense_1_w); -void* var_50 = tensorAdd(var_49, dense_1_b); -void* var_51 = tensorSoftmax(var_50); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_51); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_10_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_10_b.bin deleted file mode 100644 index dbdb7c37b64e8e16a6ea093d4d6e4f1af8258610..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_10_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_10_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_10_w.bin deleted file mode 100644 index 9c188f6f249781fc6aca77709f7dba4231072765..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_10_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_11_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_11_b.bin deleted file mode 100644 index 97b8ebe8bdc9d4e5ca74e6126eca1e14f5bfd3db..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_11_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_11_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_11_w.bin deleted file mode 100644 index 10fa1ebe08c6712f341d2d270492643fda102014..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_11_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_12_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_12_b.bin deleted file mode 100644 index 8e713d6176cac919a63d2ea8ed9fa47fe800db05..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_12_b.bin +++ /dev/null @@ -1,2 +0,0 @@ -)¯¼YÅG¾^þ™½-‚ä¾R¾Ý¾‰c¾l2&?D¾ -"q<~øb¾ÌîY¾LmÞ½hÍ$¾ÌÅg¾]db¾V&¾ÿ p¾À’`¾M·4½Õø=š×¾õ: ½Ž“½_ºN½µf¼Û]K¾à·õ½>°ù¼“(½®èྮb< \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_12_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_12_w.bin deleted file mode 100644 index 465c40b612851a9bd398ce5d24c0b25982db8a67..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_12_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_13_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_13_b.bin deleted file mode 100644 index 84fdd64bf1381f65f3ddd65913b5c0f469f49daa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_13_b.bin +++ /dev/null @@ -1 +0,0 @@ -¶‡“¼ž®Y¾$,Ö¼Y'm½"0Ž*„6¿(Ö¿äšÌ¾§þ¿¾IX/¾Û^l¼VÌú¼'P‹¾B‹[½·õa½:n¿î@¾F•¼¿áVè¼#…¿'©›¿¸{j¾~º½ÿŽ¾PXi¾ñs¹½ÿƒ›¿ÂV¿¸Žà½BD¾eò™¾Ø¥½ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_13_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_13_w.bin deleted file mode 100644 index 6191b0390b3ffbb7424fda7653b609022adbaa2e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_13_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_14_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_14_b.bin deleted file mode 100644 index e2f04a5e2ffbce542423a97f67ddc3dc8276abbd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_14_b.bin +++ /dev/null @@ -1,2 +0,0 @@ -Dz\¾—ý(¾n¬½ãå¾0 -¾h.¾á"Œ¾â%ˆ>†Ÿ¾BÁ½B¦¿_V¾e“ѽD„¾EàϽUIf¾Ç3N¾zì7¾7¨a¾–Ž‚¾3í¼Ã?¾Ðl¶½üß…¾Ss¡½µ Š¾ïÞ¾WŸ¾ô¾í÷³¾³Õ½ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_14_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_14_w.bin deleted file mode 100644 index d1b63e862f10a2e7c9db4e34db50869a6b7de295..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_14_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_15_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_15_b.bin deleted file mode 100644 index 81abea9153be67ff7867c2939ca6afca9e7a7c2e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_15_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_15_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_15_w.bin deleted file mode 100644 index 2bfe843e43ab3a638cf74ec4b5890f7402f16368..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_15_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_16_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_16_b.bin deleted file mode 100644 index a674bac343d6cb3b1396a01ec1c23c6406eb0687..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_16_b.bin +++ /dev/null @@ -1,2 +0,0 @@ -Л>g܇?5þ½ì¹¼a¢';7Ð+¼t¾¤h¿è1”>ªª¾èêl?ÅŠ>n>ý«=w2¾Uf?k&?ñã!> ã‘>ø!½ß‹½âuD=¦d¾!6Á¾Cb?ûÄ輆¤¼¸Ñ¿j½–B·=œœY½Æ+®>9ø]½Š -î<ÂQ¾ˆlO½»®9È“¾´$>³Ð ½q(?S)¾T¾Œ'>€ù…>¡^¾U2?µ”œ>Ϙ¾…]¿‹úZ½ƒNá½U½ÔY>qÅ#>›bä>B"?J˜?m+ý¾¹zɾ“½½Œ*z¼mc?Ç-x= \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_16_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_16_w.bin deleted file mode 100644 index c4c291ec63cf370ee1a159b7c776cb095eb419aa..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_16_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_17_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_17_b.bin deleted file mode 100644 index 56a0d3dba4cf3ee1098107daab9e9a5ef89a8f7e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_17_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_17_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_17_w.bin deleted file mode 100644 index 94460a2a9c014acf06b561d10e8e342df6416c79..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_17_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_18_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_18_b.bin deleted file mode 100644 index 812f0bc63a04eee434f32e44b08c535a6fff0e1f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_18_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_18_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_18_w.bin deleted file mode 100644 index 22d04d201690e95c09dd39b2e62395356a91a610..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_18_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_19_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_19_b.bin deleted file mode 100644 index 264ebf9db7b81d0c58bf42367964001796bb7007..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_19_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_19_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_19_w.bin deleted file mode 100644 index 26692e859c2ff6a6f466c5280eb0a9ec13e7ba3d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_19_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_1_b.bin deleted file mode 100644 index d8fa20287f246359d687a7e1c8a9add5b0d482b1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_1_b.bin +++ /dev/null @@ -1 +0,0 @@ -¤>>w%¾uй¾Õ>ƒ¸ã½P±>t'=|,¾šó:>Û/í=£Æ=1ôF>wÖ,½Ã/?¾á`¤» \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_1_w.bin deleted file mode 100644 index 2b28d11d7087f8482ca6b85614bac9c5d89e86ba..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_20_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_20_b.bin deleted file mode 100644 index c1e9487d5c2dbf612e1d405894b4be57ac86931a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_20_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_20_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_20_w.bin deleted file mode 100644 index 19bb82b16e0c852ef7de1133581570de4e5163d4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_20_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_21_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_21_b.bin deleted file mode 100644 index f8e4509e626f855566c430560c33501c741d82cd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_21_b.bin +++ /dev/null @@ -1,2 +0,0 @@ -oD??êÙ€?àkü½žHÝ>Ô61¾½<=¡¶(>Ž—¿œˆ>"W«¿¨^B½F[$¿:’ì>¾ýë¼Í -'¿]3¡>Üx!¿¿N?}Þ=oCÀ”=¾é½*¢Ÿ>í#œ¿Gk?ŠxÞ>YÌ¢¾$l¢¼)Ó¿² ¿6¸q¿ä—¿,à>?ÿѽ£ïξFlü¾%»¾ò—X¿³¶:>IZ À±@q½bõ‰½‚yŸ¾Ü&,¿÷‰µ¿‹8¿qÞD?*ƒ;¿û뽚ۗ½‰>ô'*¿pÑ¡¾—Á>Âý\>*Y5¾åsP½Êè’>E¥¿—1Q¿¢*)¾ç©–¿GSµ¿›JÔ¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_21_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_21_w.bin deleted file mode 100644 index 2f27e0df6f33191954029c4586e856cd8521af10..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_21_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_2_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_2_b.bin deleted file mode 100644 index 66f62621d56c6d4713819980a3fd8511c6a01dcf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_2_b.bin +++ /dev/null @@ -1 +0,0 @@ -ç‹=߇˜>Ä@Y>žÁ9>âË»Sé>“4W>Ó€"=.ÊU½l‘¬>¬ÅÕ>!ì=\¿V>µ¤>2ô¼ eŽ¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_2_w.bin deleted file mode 100644 index 9f03691a7faef92691bc8761bd89cd62bda7b516..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_3_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_3_b.bin deleted file mode 100644 index 7e9cb4e6ec2336a8230354a1ff90f45bf2f29d66..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_3_b.bin +++ /dev/null @@ -1 +0,0 @@ -¹¥¶=MÖt¾À,J>Ê;\¿éX‹½4û;=–šF>«+¾i¬¦½Õ¹ß=G¹=Ø)3½–c&½>bμž±>}¬°½ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_3_w.bin deleted file mode 100644 index 5eaf436d696ce8a262beb897bf87732d19a61f54..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_4_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_4_b.bin deleted file mode 100644 index 9c979c9d8997c551b5121a77587a6b014c8dbd2e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_4_b.bin +++ /dev/null @@ -1 +0,0 @@ -Äa>½`‹>!Æ½ß >¾¶½}K›>!Ã3¿õc>vLN>ÊU½Ö!>ÍÙ5>U]¾`þ¸¾úÈ=lq¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_4_w.bin deleted file mode 100644 index d5ef72fda73527d7c94a087005a2ebeac75e96ca..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_5_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_5_b.bin deleted file mode 100644 index c116be8c273f8abd1f40a7b446bffcb14f74df24..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_5_b.bin +++ /dev/null @@ -1 +0,0 @@ -”-¾‰óݽDb½DF½œX“½¦ÑŠ½l®¡½Ï•S¾[6>œÃ–½Ï¤.½PÖÜ<\mN½ÓrI½æX™½êG¢¼ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_5_w.bin deleted file mode 100644 index 687e514cbff23bf4c4ee12f5068c5ee081cf7fd0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_6_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_6_b.bin deleted file mode 100644 index a00bc8b1ffc5ca67d5aa827e9d74b34123e2f483..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_6_b.bin +++ /dev/null @@ -1 +0,0 @@ -iA¾vÿ=‡<¿™ˆ•=ÉÎG¿šö&¿žVò½°ñ½'<¿ìûw½ÕÍQ¿¥`Å>óÁ½¼t¾â¨¾=7UK= \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_6_w.bin deleted file mode 100644 index a5db3c15d9175bb5473981b4c0bbe58bce141835..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_7_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_7_b.bin deleted file mode 100644 index 201aea2f09a83bf5ad755304aa63342a036b659d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_7_b.bin +++ /dev/null @@ -1 +0,0 @@ -A8µ½ ˆ¼IÂr¾÷·*¼¥¾“½ m¶¾ Y¾Že>Í޽ܺ¾Š(ü¼y.6¾Ð‹lºÏ̽ ž¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_7_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_7_w.bin deleted file mode 100644 index 870a5719406d392aaaaef8ce38a126ed03cafd9a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_7_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_8_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_8_b.bin deleted file mode 100644 index 0fb9f0830c7ffe96255354de4d9e5b0378dfb6a6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_8_b.bin +++ /dev/null @@ -1 +0,0 @@ -Øwã>ëÒé>Ý4F?ýÜ;=Ô\¾‚\ô<[fõ¼¶Ðè»w@-¾2C¹½ƒßk>Gã½ê ½#t6>Ô¼=Ø9×®=Sï¾>œ6?Õ@ >aý>é?}#’>Ñ8Ç>Y«¬<UÀ>A ¾ªð—¾ìª=FR¸½ãI>D> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_8_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_8_w.bin deleted file mode 100644 index 41495fea2fc429d0d9bf3f8afc9ff9879d35618d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_8_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_9_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_9_b.bin deleted file mode 100644 index dbdb7c37b64e8e16a6ea093d4d6e4f1af8258610..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_9_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_9_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_9_w.bin deleted file mode 100644 index c5618b47d9bd4417cd36d48d9288759a8f955477..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/conv2d_9_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/dense_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/dense_1_b.bin deleted file mode 100644 index 5df6e08172e8ad71bf72131d8e7afa8389c77514..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/dense_1_b.bin +++ /dev/null @@ -1 +0,0 @@ -×Ö¦½Óü¾ôeŽ¾ƒ‘>[>?°Ã¾„ƒ:>Aù½>n‰>>¸> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/dense_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/dense_1_w.bin deleted file mode 100644 index eb7245db1d3e84e47b1a2eeb3e35126cfdf2764c..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/dense_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/input.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/input.bin deleted file mode 100644 index 7426b7222304d2a59fb7619761969b70bcc5d2c4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/input.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/labels.bin deleted file mode 100644 index d386ac9dde8313657aac92e874fe25a36277bd86..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/labels32.bin b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/labels32.bin deleted file mode 100644 index c501aed519a8c7d79c189e34735deb2c09f24d75..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/labels32.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/promise_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/promise_src.cc deleted file mode 100644 index f6e7e32153a5e89a68798f809cf4166285b408ea..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/promise_src.cc +++ /dev/null @@ -1,162 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - - -std::string dir_prefix = std::string("resnet18_cifar10_promise/"); -std::string input_path = dir_prefix + std::string("input.bin"); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -std::string labels_path = dir_prefix + std::string("labels.bin"); -uint8_t* labels = readLabels(labels_path.c_str(),10000); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); -void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); -void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); -std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); -void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); -void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); -std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); -void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); -void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); -void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); -void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); -void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); -void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); -void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); -void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); -void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); -void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); -void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - -void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -1.0248864, 1.2929907, conv2d_1_b, -0.36291853, 0.2533059, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.9356618, 9); -void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 2.9356618, conv2d_2_w, -0.69884616, 0.71849966, conv2d_2_b, -0.2781147, 0.45571187, 1, 1, 1, 1, -1, 0, 1, 0.0, 4.0425158, 9); -void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.0425158, conv2d_3_w, -0.59568167, 0.7714691, conv2d_3_b, -0.8602873, 0.19743633, 1, 1, 1, 1, -1, 0, -1, -10.203314, 9.055045, 9); -void* var_3 = tensorAdd(var_0, var_2); -void* var_4 = tensorRelu(var_3); -void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 9.734258, conv2d_4_w, -0.41976976, 0.43748936, conv2d_4_b, -0.7021962, 0.3033103, 1, 1, 1, 1, -1, 0, 1, 0.0, 7.0053105, 9); -void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 7.0053105, conv2d_5_w, -0.46757826, 0.4635873, conv2d_5_b, -0.20662616, 0.1778044, 1, 1, 1, 1, -1, 0, -1, -4.8778534, 6.7311873, 9); -void* var_7 = tensorAdd(var_4, var_6); -void* var_8 = tensorRelu(var_7); -void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 10.858562, conv2d_6_w, -0.64404047, 0.45383143, conv2d_6_b, -0.819547, 0.38550296, 1, 1, 1, 1, -1, 0, 1, 0.0, 8.843336, 9); -void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 8.843336, conv2d_7_w, -0.41986948, 0.33654243, conv2d_7_b, -0.3563013, 0.22371122, 1, 1, 1, 1, -1, 0, -1, -10.204111, 5.4952374, 9); -void* var_11 = tensorAdd(var_8, var_10); -void* var_12 = tensorRelu(var_11); -void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 11.359337, conv2d_8_w, -0.4805263, 0.50655717, conv2d_8_b, -0.296758, 0.7742441, 1, 1, 2, 2, -1, 0, 1, 0.0, 19.303282, 9); -void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 19.303282, conv2d_9_w, -0.52083415, 0.45517674, conv2d_9_b, -0.20242067, 0.8236838, 1, 1, 1, 1, -1, 0, -1, -24.404322, 24.37361, 9); -void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 11.359337, conv2d_10_w, -0.5338656, 1.3395424, conv2d_10_b, -0.20242067, 0.8236838, 0, 0, 2, 2, -1, 0, -1, -6.497986, 12.8968935, 9); -void* var_16 = tensorAdd(var_15, var_14); -void* var_17 = tensorRelu(var_16); -void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 29.462997, conv2d_11_w, -0.34429058, 0.43629733, conv2d_11_b, -1.0744808, 0.056708273, 1, 1, 1, 1, -1, 0, 1, 0.0, 24.329395, 9); -void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 24.329395, conv2d_12_w, -0.30342352, 0.39493486, conv2d_12_b, -0.44630566, 0.6492069, 1, 1, 1, 1, -1, 0, -1, -9.780206, 20.320444, 9); -void* var_20 = tensorAdd(var_17, var_19); -void* var_21 = tensorRelu(var_20); -void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 29.647404, conv2d_13_w, -0.38351893, 0.45775774, conv2d_13_b, -1.4733055, -0.014426912, 1, 1, 1, 1, -1, 0, 1, 0.0, 25.600573, 9); -void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 25.600573, conv2d_14_w, -0.25695276, 0.45372736, conv2d_14_b, -0.5259744, 0.26591402, 1, 1, 1, 1, -1, 0, -1, -10.179335, 27.598986, 9); -void* var_24 = tensorAdd(var_21, var_23); -void* var_25 = tensorRelu(var_24); -void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 42.450073, conv2d_15_w, -0.55299705, 0.5443531, conv2d_15_b, -0.71790683, 1.2730768, 1, 1, 2, 2, -1, 0, 1, 0.0, 48.943645, 9); -void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 48.943645, conv2d_16_w, -0.4203967, 0.48641303, conv2d_16_b, -0.90653443, 1.3546854, 1, 1, 1, 1, -1, 0, -1, -75.016396, 112.3873, 9); -void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 42.450073, conv2d_17_w, -0.4365755, 0.84913826, conv2d_17_b, -0.90653443, 1.3546851, 0, 0, 2, 2, -1, 0, -1, -13.93596, 24.614315, 9); -void* var_29 = tensorAdd(var_28, var_27); -void* var_30 = tensorRelu(var_29); -void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 126.758545, conv2d_18_w, -0.38657624, 0.5228989, conv2d_18_b, -1.2083547, 0.76361173, 1, 1, 1, 1, -1, 0, 1, 0.0, 130.02768, 9); -void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 130.02768, conv2d_19_w, -0.40857902, 0.575035, conv2d_19_b, -1.8731614, 1.0960501, 1, 1, 1, 1, -1, 0, -1, -97.91927, 130.89008, 9); -void* var_33 = tensorAdd(var_30, var_32); -void* var_34 = tensorRelu(var_33); -void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 220.4094, conv2d_20_w, -0.33079496, 0.5893278, conv2d_20_b, -1.0234511, 1.0016295, 1, 1, 1, 1, -1, 0, 1, 0.0, 268.19254, 9); -void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 268.19254, conv2d_21_w, -0.27897888, 0.38280907, conv2d_21_b, -2.2086356, 1.0066502, 1, 1, 1, 1, -1, 0, -1, -235.08034, 264.33008, 9); -void* var_37 = tensorAdd(var_34, var_36); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); -void* var_40 = FCLayer_PROMISE(var_39, 0.0, 39.821262, dense_1_w, -1.5092047, 1.0279838, dense_1_b, -0.49379802, 0.61032647, -1, -84.49565, 60.312202, 9); -void* var_41 = tensorSoftmax(var_40); - -computeAccuracy2(labels,10000,var_41); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/quant_ranges.txt b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/quant_ranges.txt deleted file mode 100644 index af0279b1d2980d8c8d71f20f3ef8c3f3da585699..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/quant_ranges.txt +++ /dev/null @@ -1,22 +0,0 @@ --0.5500815 0.60786617 -1.0248864 1.2929907 -0.36291853 0.2533059 0.0 0.753551840782 -0.0 0.753551840782 -0.69884616 0.71849966 -0.2781147 0.45571187 0.0 1.01057458043 -0.0 1.01057458043 -0.59568167 0.7714691 -0.8602873 0.19743633 -1.84771883726 1.87930787086 -0.0 2.33981014252 -0.41976976 0.43748936 -0.7021962 0.3033103 0.0 1.04317724705 -0.0 1.04317724705 -0.46757826 0.4635873 -0.20662616 0.1778044 -0.829483509064 0.786805033684 -0.0 2.49733686686 -0.64404047 0.45383143 -0.819547 0.38550296 0.0 0.897360802293 -0.0 0.897360802293 -0.41986948 0.33654243 -0.3563013 0.22371122 -0.957150224447 0.54919362247 -0.0 2.37362146616 -0.4805263 0.50655717 -0.296758 0.7742441 0.0 3.01592136621 -0.0 3.01592136621 -0.52083415 0.45517674 -0.20242067 0.8236838 -5.2759475708 5.79733039856 -0.0 2.37362146616 -0.5338656 1.3395424 -0.20242067 0.8236838 -0.738995380998 2.33600783587 -0.0 7.07933432579 -0.34429058 0.43629733 -1.0744808 0.056708273 0.0 1.58645607233 -0.0 1.58645607233 -0.30342352 0.39493486 -0.44630566 0.6492069 -1.49672914267 1.29970229745 -0.0 7.11914063454 -0.38351893 0.45775774 -1.4733055 -0.014426912 0.0 1.52876508832 -0.0 1.52876508832 -0.25695276 0.45372736 -0.5259744 0.26591402 -1.59576894164 1.08074297309 -0.0 6.94405080318 -0.55299705 0.5443531 -0.71790683 1.2730768 0.0 10.3651468277 -0.0 10.3651468277 -0.4203967 0.48641303 -0.90653443 1.3546854 -22.372925148 17.2033731079 -0.0 6.94405080318 -0.4365755 0.84913826 -0.90653443 1.3546851 -3.66810325861 4.87814051151 -0.0 18.8401451111 -0.38657624 0.5228989 -1.2083547 0.76361173 0.0 19.1229192352 -0.0 19.1229192352 -0.40857902 0.575035 -1.8731614 1.0960501 -31.3229312897 14.8234729958 -0.0 23.7382488823 -0.33079496 0.5893278 -1.0234511 1.0016295 0.0 19.5892774963 -0.0 19.5892774963 -0.27897888 0.38280907 -2.2086356 1.0066502 -34.4416886902 20.9890329933 -0.0 10.8541981602 -1.5092047 1.0279838 -0.49379802 0.61032647 -40.9121678543 25.7082381058 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/src.cc deleted file mode 100644 index e82c54651aaa8b8df2ab34b65490aa79b730c327..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/src.cc +++ /dev/null @@ -1,193 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - -std::string dir_prefix = std::string("resnet18_cifar10_promise/"); -std::string input_path = dir_prefix + std::string("input.bin"); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -std::string labels_path = dir_prefix + std::string("labels.bin"); -uint8_t* labels = readLabels(labels_path.c_str(),10000); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); -void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); -void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); -std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); -void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); -void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); -std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); -void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); -void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); -void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); -void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); -void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); -void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); -void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); -void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); -void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); -void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); -void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - -void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); -void* var_3 = tensorAdd(var_2, conv2d_1_b); -void* var_4 = tensorRelu(var_3); -void* var_6 = tensorConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); -void* var_7 = tensorAdd(var_6, conv2d_2_b); -void* var_8 = tensorRelu(var_7); -void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); -void* var_11 = tensorAdd(var_10, conv2d_3_b); -void* var_12 = tensorAdd(var_4, var_11); -void* var_13 = tensorRelu(var_12); -void* var_15 = tensorConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); -void* var_16 = tensorAdd(var_15, conv2d_4_b); -void* var_17 = tensorRelu(var_16); -void* var_19 = tensorConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); -void* var_20 = tensorAdd(var_19, conv2d_5_b); -void* var_21 = tensorAdd(var_13, var_20); -void* var_22 = tensorRelu(var_21); -void* var_24 = tensorConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); -void* var_25 = tensorAdd(var_24, conv2d_6_b); -void* var_26 = tensorRelu(var_25); -void* var_28 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); -void* var_29 = tensorAdd(var_28, conv2d_7_b); -void* var_30 = tensorAdd(var_22, var_29); -void* var_31 = tensorRelu(var_30); -void* var_33 = tensorConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); -void* var_34 = tensorAdd(var_33, conv2d_8_b); -void* var_35 = tensorRelu(var_34); -void* var_37 = tensorConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); -void* var_38 = tensorAdd(var_37, conv2d_9_b); -void* var_40 = tensorConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); -void* var_41 = tensorAdd(var_40, conv2d_10_b); -void* var_42 = tensorAdd(var_41, var_38); -void* var_43 = tensorRelu(var_42); -void* var_45 = tensorConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); -void* var_46 = tensorAdd(var_45, conv2d_11_b); -void* var_47 = tensorRelu(var_46); -void* var_49 = tensorConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); -void* var_50 = tensorAdd(var_49, conv2d_12_b); -void* var_51 = tensorAdd(var_43, var_50); -void* var_52 = tensorRelu(var_51); -void* var_54 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); -void* var_55 = tensorAdd(var_54, conv2d_13_b); -void* var_56 = tensorRelu(var_55); -void* var_58 = tensorConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); -void* var_59 = tensorAdd(var_58, conv2d_14_b); -void* var_60 = tensorAdd(var_52, var_59); -void* var_61 = tensorRelu(var_60); -void* var_63 = tensorConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); -void* var_64 = tensorAdd(var_63, conv2d_15_b); -void* var_65 = tensorRelu(var_64); -void* var_67 = tensorConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); -void* var_68 = tensorAdd(var_67, conv2d_16_b); -void* var_70 = tensorConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); -void* var_71 = tensorAdd(var_70, conv2d_17_b); -void* var_72 = tensorAdd(var_71, var_68); -void* var_73 = tensorRelu(var_72); -void* var_75 = tensorConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); -void* var_76 = tensorAdd(var_75, conv2d_18_b); -void* var_77 = tensorRelu(var_76); -void* var_79 = tensorConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); -void* var_80 = tensorAdd(var_79, conv2d_19_b); -void* var_81 = tensorAdd(var_73, var_80); -void* var_82 = tensorRelu(var_81); -void* var_84 = tensorConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); -void* var_85 = tensorAdd(var_84, conv2d_20_b); -void* var_86 = tensorRelu(var_85); -void* var_88 = tensorConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); -void* var_89 = tensorAdd(var_88, conv2d_21_b); -void* var_90 = tensorAdd(var_82, var_89); -void* var_91 = tensorRelu(var_90); -void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); -void* var_94 = tensorGemmGPU(var_92, dense_1_w); -void* var_95 = tensorAdd(var_94, dense_1_b); -void* var_96 = tensorSoftmax(var_95); - -computeAccuracy2(labels,10000,var_96); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_10_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_10_b.bin deleted file mode 100644 index 5c1018acfb9bced92638c8ec85b8b2637c525f89..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_10_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_10_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_10_w.bin deleted file mode 100644 index 9741a6c0ab1be107d7777e8ba3cefba61a58ea13..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_10_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_11_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_11_b.bin deleted file mode 100644 index 2103a27bb8f5e9dc4799f435939c99b9e5d27c63..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_11_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_11_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_11_w.bin deleted file mode 100644 index 3af50120b4e5febeb17d85e1ab6636ea4aff68ce..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_11_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_12_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_12_b.bin deleted file mode 100644 index 86d09c41cef5bfbc2776901092db0227e7dfe318..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_12_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_12_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_12_w.bin deleted file mode 100644 index 9307262357575ab5bc973cc99889637055e90841..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_12_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_13_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_13_b.bin deleted file mode 100644 index 43788cb58d1d59d23fab0329c55cf1ec442ae1bd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_13_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_13_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_13_w.bin deleted file mode 100644 index 1935a50645b8420a3c6767692f60ef57c685074a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_13_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_1_b.bin deleted file mode 100644 index bd4b82494cab28510ff7fc55f565ece1bfb8f7fb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_1_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_1_w.bin deleted file mode 100644 index eb883ee6024240b14215ef0e9df3152fe35c99f3..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_2_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_2_b.bin deleted file mode 100644 index 93bb77192d28c7fcf6f92e0ffaab9637b763df28..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_2_b.bin +++ /dev/null @@ -1 +0,0 @@ -!Ü=!J¹<«¾j>ÓOä=ŸÐî:²_¾Dr=~>Y«=Ö>³½iÛ½ñ3½,3¾ŠÛ>ÇÛ=¸¾uŒ>ž\<Z7=Z§¬=''6¾o¨;Ñ»õ¾à¡t¾©¾ òG½àÓ¨¾¥å¼ôÿ]¾,O¨=¶’<»lÏ=¥Ò">¡'Œ½ÃŠ=®¤T¾Z==2éÏ=üjë½ûHF¿õ¾“œ¼£}=³¬¾ßä¼Ó°”>ÛïQ?VÍ<; =8ñ‰½‹xr¾¢°}<ÿz>îšG¾{“½ax6¾¶Î‚»$Ôë¾DpÎ=Š|5¾Á2>¤tY>ä©= \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_2_w.bin deleted file mode 100644 index 9f8047bf248a0c79004f352b1610d6c1dd13c019..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_3_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_3_b.bin deleted file mode 100644 index d1c8245fc8ac5933fac0a89cfa5d6213e819410a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_3_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_3_w.bin deleted file mode 100644 index 04e97f2c791ac1a52d93382f2ffe7235b6fa6806..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_4_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_4_b.bin deleted file mode 100644 index cafc8138078aed273e04d971d32ef9200e6fcdad..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_4_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_4_w.bin deleted file mode 100644 index 8e8962fc8523eea064719fdab39481c344f48428..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_5_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_5_b.bin deleted file mode 100644 index 29d07bfcd1265fbc102a0e449da2de74732178ff..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_5_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_5_w.bin deleted file mode 100644 index 32b5bf0f4567e5eeb4f542abcb67a2284cff3180..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_6_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_6_b.bin deleted file mode 100644 index 5b2878eed5c2e9b95608ba3215fa25cdde175d18..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_6_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_6_w.bin deleted file mode 100644 index 74924b2c639f16c230f167b47c1a9943fbc6a439..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_7_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_7_b.bin deleted file mode 100644 index ab645f86d150c69fd1038d0ae687d8947b0baa71..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_7_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_7_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_7_w.bin deleted file mode 100644 index 17309bf27f0fdcf360c42d106e6396670ce46b9e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_7_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_8_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_8_b.bin deleted file mode 100644 index a95ca5bfec74e45598866ee6271818adb91115e5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_8_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_8_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_8_w.bin deleted file mode 100644 index cd973995acba0f7f87668ccaba198b6b19b8f5f9..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_8_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_9_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_9_b.bin deleted file mode 100644 index 2eeb9f1daac2d414b251a3696aea4d8847159b6b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_9_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_9_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_9_w.bin deleted file mode 100644 index d34c4d2b9ab1f28e16392da758ef2cfc4ab56359..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/conv2d_9_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_1_b.bin deleted file mode 100644 index 894c13f1e61a964e0490904d48c7ee6aea3a82d4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_1_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_1_w.bin deleted file mode 100644 index a45c830ca91e2bbf995d447edaa308d4c7261ea5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_2_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_2_b.bin deleted file mode 100644 index dc02631634718d4b6716876538380cc0596a2ef6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_2_b.bin +++ /dev/null @@ -1 +0,0 @@ - Sî>:Ä}¾õ…>ºpž?ñ‡¿’ß²>Vâ–¿Q0å<qö>Ò/“< \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_2_w.bin deleted file mode 100644 index 05f5c5cc9c95e171f54836b452babbb48be7ab08..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/dense_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/input.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/input.bin deleted file mode 100644 index d500ac2cdaf78b2ab0e51eb9f8d89174247e52d5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/input.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/labels.bin deleted file mode 100644 index 4451911edf1afe4b0bc792730fbca3d4141d0c50..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/labels32.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/labels32.bin deleted file mode 100644 index bf2090756b593142d0575f82bb52bdcd7bfee6d8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/labels32.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/quant_ranges.txt b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/quant_ranges.txt deleted file mode 100644 index b742502f145c535db5432c0f6a0de27ba3ed3979..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/quant_ranges.txt +++ /dev/null @@ -1,15 +0,0 @@ --1.8816367 2.0934217 -0.53275156 0.49437004 -0.6403629 0.2490165 0.0 1.35908746719 -0.0 1.35908746719 -0.2688396 0.20639156 -0.7745511 0.82006615 0.0 2.52123117924 -0.0 2.52123117924 -0.16776876 0.14878987 -0.35283303 0.5154362 0.0 1.20119857848 -0.0 1.20119857848 -0.088948585 0.114222586 -0.30250227 0.36856708 0.0 1.03598809302 -0.0 1.03598809302 -0.07739562 0.10973293 -0.15568458 0.17634983 0.0 0.300495595038 -0.0 0.300495595038 -0.051649556 0.05435231 -0.07395447 0.07996062 0.0 0.11490475405 -0.0 0.11490475405 -0.043513633 0.07577866 -0.06921874 0.02660573 0.0 0.16232508488 -0.0 0.16232508488 -0.033842053 0.045218028 -0.022827804 0.023845317 0.0 0.124249965735 -0.0 0.124249965735 -0.02211613 0.032084666 -0.02699063 0.03773564 0.0 0.174634486511 -0.0 0.174634486511 -0.01979376 0.034854397 -0.036107242 0.07056531 0.0 0.575175762177 -0.0 0.575175762177 -0.03452098 0.046055835 -0.051925894 0.07039055 0.0 0.771875114441 -0.0 0.771875114441 -0.025946895 0.040090334 -0.06049362 0.12658806 0.0 1.17285169065 -0.0 1.17285169065 -0.021766115 0.03315237 -0.20705001 0.117947325 0.0 2.00157693863 -0.0 2.00157693863 -0.042597745 0.046707444 -0.21937433 0.2545502 0.0 2.00236111879 -0.0 2.00236111879 -0.32550547 0.30829763 -1.1787822 1.2378151 -18.2514705467 24.1736344528 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/src.cc deleted file mode 100644 index 44179ee9f39c9547270b45fc84249835350bee5f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/src.cc +++ /dev/null @@ -1,141 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - -std::string dir_prefix = std::string("vgg16_cifar10/"); -std::string input_path = dir_prefix + std::string("input.bin"); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -std::string labels_path = dir_prefix + std::string("labels.bin"); -uint8_t* labels = readLabels(labels_path.c_str(),10000); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - -void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); -void* var_1 = tensorAdd(var_0, conv2d_1_b); -void* var_2 = tensorRelu(var_1); -void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); -void* var_5 = tensorAdd(var_4, conv2d_2_b); -void* var_6 = tensorRelu(var_5); -void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); -void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); -void* var_9 = tensorAdd(var_8, conv2d_3_b); -void* var_10 = tensorRelu(var_9); -void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); -void* var_13 = tensorAdd(var_12, conv2d_4_b); -void* var_14 = tensorRelu(var_13); -void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); -void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); -void* var_17 = tensorAdd(var_16, conv2d_5_b); -void* var_18 = tensorRelu(var_17); -void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); -void* var_21 = tensorAdd(var_20, conv2d_6_b); -void* var_22 = tensorRelu(var_21); -void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); -void* var_25 = tensorAdd(var_24, conv2d_7_b); -void* var_26 = tensorRelu(var_25); -void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); -void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); -void* var_29 = tensorAdd(var_28, conv2d_8_b); -void* var_30 = tensorRelu(var_29); -void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); -void* var_33 = tensorAdd(var_32, conv2d_9_b); -void* var_34 = tensorRelu(var_33); -void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); -void* var_37 = tensorAdd(var_36, conv2d_10_b); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); -void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); -void* var_41 = tensorAdd(var_40, conv2d_11_b); -void* var_42 = tensorRelu(var_41); -void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); -void* var_45 = tensorAdd(var_44, conv2d_12_b); -void* var_46 = tensorRelu(var_45); -void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); -void* var_49 = tensorAdd(var_48, conv2d_13_b); -void* var_50 = tensorRelu(var_49); -void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); -void* var_54 = tensorGemmGPU(var_51, dense_1_w); -void* var_55 = tensorAdd(var_54, dense_1_b); -void* var_56 = tensorRelu(var_55); -void* var_58 = tensorGemmGPU(var_56, dense_2_w); -void* var_59 = tensorAdd(var_58, dense_2_b); -void* var_60 = tensorSoftmax(var_59); - -computeAccuracy2(labels,10000,var_60); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/vgg16_cifar_calib.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/vgg16_cifar_calib.bin deleted file mode 100644 index 43bc1e5b985604c5a17fe67d2db4fec82e12042d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/vgg16_cifar_calib.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/vgg16_train_labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/vgg16_train_labels.bin deleted file mode 100644 index 9be730fd6f397987a6948a8d9196c7e156675d1b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/vgg16_train_labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/approxhpvm_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/approxhpvm_src.cc deleted file mode 100644 index 8084e3723a6141ac0e99729b8455bcc529ac7a0f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/approxhpvm_src.cc +++ /dev/null @@ -1,982 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_2_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_5_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_6_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_9_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_12_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_13_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_16_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_17_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_19_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_20_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_22_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_23_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_26_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_29_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_32_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_33_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_35_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_36_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_38_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_39_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_41_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_42_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_43_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_44_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_45_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_46_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_47_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_48_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_49_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t input_bytes, - void* conv2d_1_w, size_t conv2d_1_w_bytes, - void* conv2d_1_b, size_t conv2d_1_b_bytes, - void* conv2d_2_w, size_t conv2d_2_w_bytes, - void* conv2d_2_b, size_t conv2d_2_b_bytes, - void* conv2d_3_w, size_t conv2d_3_w_bytes, - void* conv2d_3_b, size_t conv2d_3_b_bytes, - void* conv2d_4_w, size_t conv2d_4_w_bytes, - void* conv2d_4_b, size_t conv2d_4_b_bytes, - void* conv2d_5_w, size_t conv2d_5_w_bytes, - void* conv2d_5_b, size_t conv2d_5_b_bytes, - void* conv2d_6_w, size_t conv2d_6_w_bytes, - void* conv2d_6_b, size_t conv2d_6_b_bytes, - void* conv2d_7_w, size_t conv2d_7_w_bytes, - void* conv2d_7_b, size_t conv2d_7_b_bytes, - void* conv2d_8_w, size_t conv2d_8_w_bytes, - void* conv2d_8_b, size_t conv2d_8_b_bytes, - void* conv2d_9_w, size_t conv2d_9_w_bytes, - void* conv2d_9_b, size_t conv2d_9_b_bytes, - void* conv2d_10_w, size_t conv2d_10_w_bytes, - void* conv2d_10_b, size_t conv2d_10_b_bytes, - void* conv2d_11_w, size_t conv2d_11_w_bytes, - void* conv2d_11_b, size_t conv2d_11_b_bytes, - void* conv2d_12_w, size_t conv2d_12_w_bytes, - void* conv2d_12_b, size_t conv2d_12_b_bytes, - void* conv2d_13_w, size_t conv2d_13_w_bytes, - void* conv2d_13_b, size_t conv2d_13_b_bytes, - void* dense_1_w, size_t dense_1_w_bytes, - void* dense_1_b, size_t dense_1_b_bytes, - void* dense_2_w, size_t dense_2_w_bytes, - void* dense_2_b, size_t dense_2_b_bytes){ - - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(31, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, conv2d_6_w, conv2d_6_b, conv2d_7_w, conv2d_7_b, conv2d_8_w, conv2d_8_b, conv2d_9_w, conv2d_9_b, conv2d_10_w, conv2d_10_b, conv2d_11_w, conv2d_11_b, conv2d_12_w, conv2d_12_b, conv2d_13_w, conv2d_13_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0); - - - void* var_0 = __visc__createNodeND(0, var_0_node); - - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); - - void* var_1 = __visc__createNodeND(0, var_1_node); - - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); - - void* var_2 = __visc__createNodeND(0, var_2_node); - - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - - void* var_3 = __visc__createNodeND(0, var_3_node); - - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - __visc__bindIn(var_3, 6, 2, 0); - __visc__bindIn(var_3, 7, 3, 0); - - void* var_4 = __visc__createNodeND(0, var_4_node); - - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 8, 2, 0); - __visc__bindIn(var_4, 9, 3, 0); - - void* var_5 = __visc__createNodeND(0, var_5_node); - - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - - void* var_6 = __visc__createNodeND(0, var_6_node); - - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); - - void* var_7 = __visc__createNodeND(0, var_7_node); - - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); - __visc__bindIn(var_7, 10, 2, 0); - __visc__bindIn(var_7, 11, 3, 0); - - void* var_8 = __visc__createNodeND(0, var_8_node); - - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - __visc__bindIn(var_8, 12, 2, 0); - __visc__bindIn(var_8, 13, 3, 0); - - void* var_9 = __visc__createNodeND(0, var_9_node); - - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - - void* var_10 = __visc__createNodeND(0, var_10_node); - - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); - __visc__bindIn(var_10, 14, 2, 0); - __visc__bindIn(var_10, 15, 3, 0); - - void* var_11 = __visc__createNodeND(0, var_11_node); - - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - __visc__bindIn(var_11, 16, 2, 0); - __visc__bindIn(var_11, 17, 3, 0); - - void* var_12 = __visc__createNodeND(0, var_12_node); - - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - - void* var_13 = __visc__createNodeND(0, var_13_node); - - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); - - void* var_14 = __visc__createNodeND(0, var_14_node); - - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - __visc__bindIn(var_14, 18, 2, 0); - __visc__bindIn(var_14, 19, 3, 0); - - void* var_15 = __visc__createNodeND(0, var_15_node); - - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 20, 2, 0); - __visc__bindIn(var_15, 21, 3, 0); - - void* var_16 = __visc__createNodeND(0, var_16_node); - - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); - - void* var_17 = __visc__createNodeND(0, var_17_node); - - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); - __visc__bindIn(var_17, 22, 2, 0); - __visc__bindIn(var_17, 23, 3, 0); - - void* var_18 = __visc__createNodeND(0, var_18_node); - - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 24, 2, 0); - __visc__bindIn(var_18, 25, 3, 0); - - void* var_19 = __visc__createNodeND(0, var_19_node); - - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - - void* var_20 = __visc__createNodeND(0, var_20_node); - - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); - __visc__bindIn(var_20, 26, 2, 0); - __visc__bindIn(var_20, 27, 3, 0); - - void* var_21 = __visc__createNodeND(0, var_21_node); - - __visc__edge(var_20, var_21, 1, 0, 0, 0); - __visc__edge(var_20, var_21, 1, 1, 1, 0); - __visc__bindIn(var_21, 28, 2, 0); - __visc__bindIn(var_21, 29, 3, 0); - - void* var_22 = __visc__createNodeND(0, var_22_node); - - __visc__edge(var_21, var_22, 1, 0, 0, 0); - __visc__edge(var_21, var_22, 1, 1, 1, 0); - - void* var_23 = __visc__createNodeND(0, var_23_node); - - __visc__edge(var_22, var_23, 1, 0, 0, 0); - __visc__edge(var_22, var_23, 1, 1, 1, 0); - - void* var_24 = __visc__createNodeND(0, var_24_node); - - __visc__edge(var_23, var_24, 1, 0, 0, 0); - __visc__edge(var_23, var_24, 1, 1, 1, 0); - __visc__bindIn(var_24, 30, 2, 0); - __visc__bindIn(var_24, 31, 3, 0); - - void* var_25 = __visc__createNodeND(0, var_25_node); - - __visc__edge(var_24, var_25, 1, 0, 0, 0); - __visc__edge(var_24, var_25, 1, 1, 1, 0); - __visc__bindIn(var_25, 32, 2, 0); - __visc__bindIn(var_25, 33, 3, 0); - - void* var_26 = __visc__createNodeND(0, var_26_node); - - __visc__edge(var_25, var_26, 1, 0, 0, 0); - __visc__edge(var_25, var_26, 1, 1, 1, 0); - - void* var_27 = __visc__createNodeND(0, var_27_node); - - __visc__edge(var_26, var_27, 1, 0, 0, 0); - __visc__edge(var_26, var_27, 1, 1, 1, 0); - __visc__bindIn(var_27, 34, 2, 0); - __visc__bindIn(var_27, 35, 3, 0); - - void* var_28 = __visc__createNodeND(0, var_28_node); - - __visc__edge(var_27, var_28, 1, 0, 0, 0); - __visc__edge(var_27, var_28, 1, 1, 1, 0); - __visc__bindIn(var_28, 36, 2, 0); - __visc__bindIn(var_28, 37, 3, 0); - - void* var_29 = __visc__createNodeND(0, var_29_node); - - __visc__edge(var_28, var_29, 1, 0, 0, 0); - __visc__edge(var_28, var_29, 1, 1, 1, 0); - - void* var_30 = __visc__createNodeND(0, var_30_node); - - __visc__edge(var_29, var_30, 1, 0, 0, 0); - __visc__edge(var_29, var_30, 1, 1, 1, 0); - __visc__bindIn(var_30, 38, 2, 0); - __visc__bindIn(var_30, 39, 3, 0); - - void* var_31 = __visc__createNodeND(0, var_31_node); - - __visc__edge(var_30, var_31, 1, 0, 0, 0); - __visc__edge(var_30, var_31, 1, 1, 1, 0); - __visc__bindIn(var_31, 40, 2, 0); - __visc__bindIn(var_31, 41, 3, 0); - - void* var_32 = __visc__createNodeND(0, var_32_node); - - __visc__edge(var_31, var_32, 1, 0, 0, 0); - __visc__edge(var_31, var_32, 1, 1, 1, 0); - - void* var_33 = __visc__createNodeND(0, var_33_node); - - __visc__edge(var_32, var_33, 1, 0, 0, 0); - __visc__edge(var_32, var_33, 1, 1, 1, 0); - - void* var_34 = __visc__createNodeND(0, var_34_node); - - __visc__edge(var_33, var_34, 1, 0, 0, 0); - __visc__edge(var_33, var_34, 1, 1, 1, 0); - __visc__bindIn(var_34, 42, 2, 0); - __visc__bindIn(var_34, 43, 3, 0); - - void* var_35 = __visc__createNodeND(0, var_35_node); - - __visc__edge(var_34, var_35, 1, 0, 0, 0); - __visc__edge(var_34, var_35, 1, 1, 1, 0); - __visc__bindIn(var_35, 44, 2, 0); - __visc__bindIn(var_35, 45, 3, 0); - - void* var_36 = __visc__createNodeND(0, var_36_node); - - __visc__edge(var_35, var_36, 1, 0, 0, 0); - __visc__edge(var_35, var_36, 1, 1, 1, 0); - - void* var_37 = __visc__createNodeND(0, var_37_node); - - __visc__edge(var_36, var_37, 1, 0, 0, 0); - __visc__edge(var_36, var_37, 1, 1, 1, 0); - __visc__bindIn(var_37, 46, 2, 0); - __visc__bindIn(var_37, 47, 3, 0); - - void* var_38 = __visc__createNodeND(0, var_38_node); - - __visc__edge(var_37, var_38, 1, 0, 0, 0); - __visc__edge(var_37, var_38, 1, 1, 1, 0); - __visc__bindIn(var_38, 48, 2, 0); - __visc__bindIn(var_38, 49, 3, 0); - - void* var_39 = __visc__createNodeND(0, var_39_node); - - __visc__edge(var_38, var_39, 1, 0, 0, 0); - __visc__edge(var_38, var_39, 1, 1, 1, 0); - - void* var_40 = __visc__createNodeND(0, var_40_node); - - __visc__edge(var_39, var_40, 1, 0, 0, 0); - __visc__edge(var_39, var_40, 1, 1, 1, 0); - __visc__bindIn(var_40, 50, 2, 0); - __visc__bindIn(var_40, 51, 3, 0); - - void* var_41 = __visc__createNodeND(0, var_41_node); - - __visc__edge(var_40, var_41, 1, 0, 0, 0); - __visc__edge(var_40, var_41, 1, 1, 1, 0); - __visc__bindIn(var_41, 52, 2, 0); - __visc__bindIn(var_41, 53, 3, 0); - - void* var_42 = __visc__createNodeND(0, var_42_node); - - __visc__edge(var_41, var_42, 1, 0, 0, 0); - __visc__edge(var_41, var_42, 1, 1, 1, 0); - - void* var_43 = __visc__createNodeND(0, var_43_node); - - __visc__edge(var_42, var_43, 1, 0, 0, 0); - __visc__edge(var_42, var_43, 1, 1, 1, 0); - - void* var_44 = __visc__createNodeND(0, var_44_node); - - __visc__edge(var_43, var_44, 1, 0, 0, 0); - __visc__edge(var_43, var_44, 1, 1, 1, 0); - __visc__bindIn(var_44, 54, 2, 0); - __visc__bindIn(var_44, 55, 3, 0); - - void* var_45 = __visc__createNodeND(0, var_45_node); - - __visc__edge(var_44, var_45, 1, 0, 0, 0); - __visc__edge(var_44, var_45, 1, 1, 1, 0); - __visc__bindIn(var_45, 56, 2, 0); - __visc__bindIn(var_45, 57, 3, 0); - - void* var_46 = __visc__createNodeND(0, var_46_node); - - __visc__edge(var_45, var_46, 1, 0, 0, 0); - __visc__edge(var_45, var_46, 1, 1, 1, 0); - - void* var_47 = __visc__createNodeND(0, var_47_node); - - __visc__edge(var_46, var_47, 1, 0, 0, 0); - __visc__edge(var_46, var_47, 1, 1, 1, 0); - __visc__bindIn(var_47, 58, 2, 0); - __visc__bindIn(var_47, 59, 3, 0); - - void* var_48 = __visc__createNodeND(0, var_48_node); - - __visc__edge(var_47, var_48, 1, 0, 0, 0); - __visc__edge(var_47, var_48, 1, 1, 1, 0); - __visc__bindIn(var_48, 60, 2, 0); - __visc__bindIn(var_48, 61, 3, 0); - - void* var_49 = __visc__createNodeND(0, var_49_node); - - __visc__edge(var_48, var_49, 1, 0, 0, 0); - __visc__edge(var_48, var_49, 1, 1, 1, 0); - - __visc__bindOut(var_49, 0, 0, 0); - __visc__bindOut(var_49, 1, 1, 0); - -} - -struct ret_t { - void* tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void* input; - size_t input_bytes; - void* conv2d_1_w; - size_t conv2d_1_w_bytes; - void* conv2d_1_b; - size_t conv2d_1_b_bytes; - void* conv2d_2_w; - size_t conv2d_2_w_bytes; - void* conv2d_2_b; - size_t conv2d_2_b_bytes; - void* conv2d_3_w; - size_t conv2d_3_w_bytes; - void* conv2d_3_b; - size_t conv2d_3_b_bytes; - void* conv2d_4_w; - size_t conv2d_4_w_bytes; - void* conv2d_4_b; - size_t conv2d_4_b_bytes; - void* conv2d_5_w; - size_t conv2d_5_w_bytes; - void* conv2d_5_b; - size_t conv2d_5_b_bytes; - void* conv2d_6_w; - size_t conv2d_6_w_bytes; - void* conv2d_6_b; - size_t conv2d_6_b_bytes; - void* conv2d_7_w; - size_t conv2d_7_w_bytes; - void* conv2d_7_b; - size_t conv2d_7_b_bytes; - void* conv2d_8_w; - size_t conv2d_8_w_bytes; - void* conv2d_8_b; - size_t conv2d_8_b_bytes; - void* conv2d_9_w; - size_t conv2d_9_w_bytes; - void* conv2d_9_b; - size_t conv2d_9_b_bytes; - void* conv2d_10_w; - size_t conv2d_10_w_bytes; - void* conv2d_10_b; - size_t conv2d_10_b_bytes; - void* conv2d_11_w; - size_t conv2d_11_w_bytes; - void* conv2d_11_b; - size_t conv2d_11_b_bytes; - void* conv2d_12_w; - size_t conv2d_12_w_bytes; - void* conv2d_12_b; - size_t conv2d_12_b_bytes; - void* conv2d_13_w; - size_t conv2d_13_w_bytes; - void* conv2d_13_b; - size_t conv2d_13_b_bytes; - void* dense_1_w; - size_t dense_1_w_bytes; - void* dense_1_b; - size_t dense_1_b_bytes; - void* dense_2_w; - size_t dense_2_w_bytes; - void* dense_2_b; - size_t dense_2_b_bytes; - - struct ret_t r; -} -RootIn; - -int main(){ - -std::string dir_prefix = std::string("vgg16_cifar100_test/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -uint8_t* labels = readLabels(labels_path.c_str(),10000); - -__visc__init(); -RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - -args->input = input; -args->input_bytes = 0; -args->conv2d_1_w = conv2d_1_w; -args->conv2d_1_w_bytes = 0; -args->conv2d_1_b = conv2d_1_b; -args->conv2d_1_b_bytes = 0; -args->conv2d_2_w = conv2d_2_w; -args->conv2d_2_w_bytes = 0; -args->conv2d_2_b = conv2d_2_b; -args->conv2d_2_b_bytes = 0; -args->conv2d_3_w = conv2d_3_w; -args->conv2d_3_w_bytes = 0; -args->conv2d_3_b = conv2d_3_b; -args->conv2d_3_b_bytes = 0; -args->conv2d_4_w = conv2d_4_w; -args->conv2d_4_w_bytes = 0; -args->conv2d_4_b = conv2d_4_b; -args->conv2d_4_b_bytes = 0; -args->conv2d_5_w = conv2d_5_w; -args->conv2d_5_w_bytes = 0; -args->conv2d_5_b = conv2d_5_b; -args->conv2d_5_b_bytes = 0; -args->conv2d_6_w = conv2d_6_w; -args->conv2d_6_w_bytes = 0; -args->conv2d_6_b = conv2d_6_b; -args->conv2d_6_b_bytes = 0; -args->conv2d_7_w = conv2d_7_w; -args->conv2d_7_w_bytes = 0; -args->conv2d_7_b = conv2d_7_b; -args->conv2d_7_b_bytes = 0; -args->conv2d_8_w = conv2d_8_w; -args->conv2d_8_w_bytes = 0; -args->conv2d_8_b = conv2d_8_b; -args->conv2d_8_b_bytes = 0; -args->conv2d_9_w = conv2d_9_w; -args->conv2d_9_w_bytes = 0; -args->conv2d_9_b = conv2d_9_b; -args->conv2d_9_b_bytes = 0; -args->conv2d_10_w = conv2d_10_w; -args->conv2d_10_w_bytes = 0; -args->conv2d_10_b = conv2d_10_b; -args->conv2d_10_b_bytes = 0; -args->conv2d_11_w = conv2d_11_w; -args->conv2d_11_w_bytes = 0; -args->conv2d_11_b = conv2d_11_b; -args->conv2d_11_b_bytes = 0; -args->conv2d_12_w = conv2d_12_w; -args->conv2d_12_w_bytes = 0; -args->conv2d_12_b = conv2d_12_b; -args->conv2d_12_b_bytes = 0; -args->conv2d_13_w = conv2d_13_w; -args->conv2d_13_w_bytes = 0; -args->conv2d_13_b = conv2d_13_b; -args->conv2d_13_b_bytes = 0; -args->dense_1_w = dense_1_w; -args->dense_1_w_bytes = 0; -args->dense_1_b = dense_1_b; -args->dense_1_b_bytes = 0; -args->dense_2_w = dense_2_w; -args->dense_2_w_bytes = 0; -args->dense_2_b = dense_2_b; -args->dense_2_b_bytes = 0; - -void* dfg = __visc__launch(0, root, (void*) args); - -__visc__wait(dfg); - -void *result = static_cast<RootIn*>(args)->input; -hpvm_request_tensor(result, 0); - -__visc__cleanup(); - computeAccuracy2(labels, 10000, result); -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_10_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_10_b.bin deleted file mode 100644 index 9db02dbd02d278f3ff0ca57a29069c6aa97e5de0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_10_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_10_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_10_w.bin deleted file mode 100644 index 7bded494d26ebe0ef8d5807bf0d72b93e996bd88..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_10_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_11_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_11_b.bin deleted file mode 100644 index c504961705fa627a5d79745fb199dd3ba657176a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_11_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_11_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_11_w.bin deleted file mode 100644 index a43074e10f04f0e1f84339f053fa9fa160afcfe0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_11_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_12_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_12_b.bin deleted file mode 100644 index f355f4cd6814c73adc88d91610792d28afc55f26..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_12_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_12_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_12_w.bin deleted file mode 100644 index e0a3ba8ec934f80a8e25079f6ba2df4a80ddb8dd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_12_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_13_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_13_b.bin deleted file mode 100644 index 02b34402d6400d6ca28cb23e71149a47f003c6fe..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_13_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_13_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_13_w.bin deleted file mode 100644 index b9b2f0063091bd1cf4cce556947588e902acca1f..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_13_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_1_b.bin deleted file mode 100644 index a46ece4fa84149d5501331039ae48becebc94aa1..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_1_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_1_w.bin deleted file mode 100644 index a479061bc1cc7876846b9602fdf4058b31e539c5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_2_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_2_b.bin deleted file mode 100644 index 734a5cbf3ea3501205bdc024695afb0c716252d3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_2_b.bin +++ /dev/null @@ -1,2 +0,0 @@ -ãèÏ;ÿCú=×_Q>-î½Z5Ì=&$¿í÷‡>È,…>sv¾è2¼.ãÏ<Ã4$>׆0>Û)¿vg§¾Çô¬¿‡·K=Žõ¼=9ó¾Ÿî²¼ée=¬P¡?•¿Ì¾º}Œ=½ ~>–·"¼bKƒ>«î—¾Hu >v$™¾Æj—?cß8>OÏñ=kÇ<>ÙYM>(¿jît>щ>Ä…&¾gH¿·g&=«¿ -k>`M¶¿ ?³0T>ò£>u‹>]ˆÁ½®‚¿m‚¿M—a>ù䑽2>ë>¯ñ ¿W[…>²>#z·>Õ®Ú;›J¸=žr=€Ã=£> \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_2_w.bin deleted file mode 100644 index c0d54721818458b65a9224583d51977afab9cdbd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_3_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_3_b.bin deleted file mode 100644 index bbf5b5fc5ae9990cdb155111975a1c6a26801012..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_3_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_3_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_3_w.bin deleted file mode 100644 index 98e8ac21b55eb35b133cde17db42e6b54e34c6b0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_3_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_4_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_4_b.bin deleted file mode 100644 index 8251640419305c37ef428b620ac6bea8c19b2462..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_4_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_4_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_4_w.bin deleted file mode 100644 index a69f63de1d1f83768635a2d6e062e2773d3c4035..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_4_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_5_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_5_b.bin deleted file mode 100644 index 188fded6c6130aa36748791dba917a55dc93b626..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_5_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_5_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_5_w.bin deleted file mode 100644 index af70bc4bf9b92e25c7dfeb3033d23eedf9b50552..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_5_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_6_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_6_b.bin deleted file mode 100644 index e2999812df22d44cf3e658d32986e6ccbabc9d4c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_6_b.bin +++ /dev/null @@ -1,5 +0,0 @@ -Qwü¾ ¶–>ó< ->¼çV¼ŠP5?“ãð=Á89½vzî<ó±=¢4½`ô?~§?#‰>5ξûë$>8éü¾‰“>)q¢>¹L…>ÐMé½çÐÂ=s ¯>v)$?ÂU¿V‹>s•¶=ñÄ=•W?ž”p>Yhß>b–»½ž€>A²Æ>|Yi>²–½Ò~?èát>¦3f¾?ë¾…~£>Ë…·>Ûgž¾¦_=¼ü<œ‰€>y,¿}«·>š?O= ->.Íô=”í¾ dP¾<>¶¤>?^÷½o) >Ä#?¹”s½ÎM~>‚?øáy>FÅT¾4E“=†¿§B½I€=Û=?öG?¸ÛÑ>N0Ç>=ü?ßÞ¥=lѼ½üÜT>¢îæ>S>M>YЊ=t[N¾ámÄ>Z¶Ò>¯^T>pxõ>‡Ú>¹ÿH¾î÷q>wO$¾ -¿û0/>Ø˹>ót$¿žÁ?º¨®=x½0w>1Z,¾€3ã>Äš×=¥b¸=è6@>ìŒÐ>!4š>ý´>¸D†>°<Œ=ˆh_=}>ÿ C>‘¿þ½=ã=uj>[W?d¨Ö>Õk±>â(½“Â`¼ML“>·£?iC;>p·?èçŠ>G™>²¾yrì>e6ƒ¾ægs>bø?§{(?l¡â»:U¡>\å?v'é=÷ðœ>+U>osÖ>õq'>Žçô>Üã&>àI¾[´Î=çE ?-áD½‹>ZWy>líœ>û¯>-®¾jA¾`R¿êþL¿1Ôº6¡h»Y„=sµ)?šà—>þÌ?6ܨ=ðîÜ>C>–š>Êÿ>ä¶Ò>n"Ø>6¥Á¾Lƒ½bþ1?8}½b›Ê>ÖÁ>`ú>÷ÂÊ>ß -G>Ðå>öÑ >ûøq>±Šÿ>UÙ$?Ò?Ïm¼{mb>_I$>'-Ö>+æ+>èd¹>¦@¿^´†>Z(¯¼Ãͽð¾i>ë h¿ Õ‹=’Ï>›s>,|>þ®>ïºS>Æ–>%>“?»Àå>¤¢Á½Û÷Ø>û‚œ;õ)ï=v6̾> >®£”<i°¾¬é>Žà×¼ÜüÏ>0œÌ>˜Lä>ʯ¾–“n>ບ>!û¯>€õ£>G>ÌÍÑ>i1>•â¼§8½ ε>AˆÖ>‰F<A?¯>Ìt>x=wô?ñl ?4®_>t^»>BáÒ>¾fÙ½`:˜½-N@?ñw›¾ð|÷<É'‘>ºJ¾|>⽩ë´>ß0b>áXß½£å'¾fMM>£Ix>¥&Ƚ3úƒ>â'È>þþ>8åÉ>Y|§>ûT½9º¾ \ No newline at end of file diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_6_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_6_w.bin deleted file mode 100644 index bc0f6bb98d293a7eca254d30b39b9962830e366b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_6_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_7_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_7_b.bin deleted file mode 100644 index 6e13d00189e9d484ac13b6e6ddd0a8fb811d15e5..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_7_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_7_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_7_w.bin deleted file mode 100644 index 52f165dac46686de7c74d0f13d58a7621eedde32..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_7_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_8_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_8_b.bin deleted file mode 100644 index 522044eaf02e5cff51a74cddc769c13bf1d750b7..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_8_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_8_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_8_w.bin deleted file mode 100644 index 2c11e701d36747b6c045d5eca522bdb89eb7d1f8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_8_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_9_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_9_b.bin deleted file mode 100644 index 0569ef5fe88a2d24a43f73411e7509661233cdff..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_9_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_9_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_9_w.bin deleted file mode 100644 index bd374d936bc8977756ff5968fd32fc7e116ad560..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/conv2d_9_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_1_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_1_b.bin deleted file mode 100644 index dc4f5cf56c3308245ca8ffc63f2bde019be15e95..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_1_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_1_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_1_w.bin deleted file mode 100644 index af9de099702a966e0746d9f45bae02c4335eda70..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_1_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_2_b.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_2_b.bin deleted file mode 100644 index e5d561e947d0b8b42ecb039bee0d53076490337a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_2_b.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_2_w.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_2_w.bin deleted file mode 100644 index 351da0667b4391bd8cd3db0ce7bf4b8621931f0b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/dense_2_w.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/input.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/input.bin deleted file mode 100644 index 0492b77f00b91dbb0b321ec6c77c20737d388afa..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/input.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/labels.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/labels.bin deleted file mode 100644 index 4d4041bc05c6037e7612f325104ae24da525c5be..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/labels.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/labels32.bin b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/labels32.bin deleted file mode 100644 index 0e29aa05a086316e36429e20e1a13580e1b0c36a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/labels32.bin and /dev/null differ diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/layer_composition.txt b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/layer_composition.txt deleted file mode 100644 index 79818d6f010035c6e19f12881749f4d5b3d3c253..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/layer_composition.txt +++ /dev/null @@ -1,15 +0,0 @@ -conv add activation -conv add activation pool -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -conv add activation -conv add activation -conv add activation pool -dense add activation -dense add diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/layers.txt b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/layers.txt deleted file mode 100644 index 7eaa520e4a2e451d5ccec5c8737dec8be8458369..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/layers.txt +++ /dev/null @@ -1,15 +0,0 @@ -Conv1,10000,3,32,32,64,3,3,3 -Conv2,10000,64,32,32,64,64,3,3 -Conv3,10000,64,16,16,128,64,3,3 -Conv4,10000,128,16,16,128,128,3,3 -Conv5,10000,128,8,8,256,128,3,3 -Conv6,10000,256,8,8,256,256,3,3 -Conv7,10000,256,8,8,256,256,3,3 -Conv8,10000,256,4,4,512,256,3,3 -Conv9,10000,512,4,4,512,512,3,3 -Conv10,10000,512,4,4,512,512,3,3 -Conv11,10000,512,2,2,512,512,3,3 -Conv12,10000,512,2,2,512,512,3,3 -Conv13,10000,512,2,2,512,512,3,3 -FC1,10000,512,512,512 -FC2,10000,512,512,100 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/promise_src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/promise_src.cc deleted file mode 100644 index 0f28f2bfd69d9e8c4895e782bd02173eefcd0993..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/promise_src.cc +++ /dev/null @@ -1,138 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - -int total_runs = 100; -for (int i = 0 ; i < total_runs; i++){ - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - - - -std::string dir_prefix = std::string("vgg16_cifar100_test/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 8.190712, 9); -void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 8.190712, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 19.023172, 9); -void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 19.023172, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 14.428051, 9); -void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 14.428051, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 23.065294, 9); -void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 23.065294, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 15.165984, 9); -void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 15.165984, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 13.664733, 9); -void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 13.664733, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 19.025272, 9); -void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 19.025272, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 6.727217, 9); -void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 6.727217, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.2003012, 9); -void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 3.2003012, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.321189, 9); -void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 4.321189, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.936297, 9); -void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 2.936297, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 4.87262, 9); -void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.87262, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 10.32133, 9); -void* var_13 = FCLayer_PROMISE(var_12, 0.0, 10.32133, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 13.91, 9); -void* var_14 = FCLayer_PROMISE(var_13, 0.0, 13.91, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -70.45701, 87.34367, 9); -void* var_15 = tensorSoftmax(var_14); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_15); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -} - -dumpExecutionAccuracies(); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/quant_ranges.txt b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/quant_ranges.txt deleted file mode 100644 index 4e614e1664822d2ecf6fa426a7eb2fd7c362a2e7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/quant_ranges.txt +++ /dev/null @@ -1,15 +0,0 @@ --1.7829767 1.9456929 -0.7450515 0.71249133 -1.5885142 0.275554 0.0 8.190712 -0.0 8.190712 -0.30790088 0.43504623 -1.4242363 1.2602744 0.0 19.023172 -0.0 19.023172 -0.29189092 0.26958522 -1.0527138 0.9075671 0.0 14.428051 -0.0 14.428051 -0.15521508 0.1829038 -0.845419 1.9358484 0.0 23.065294 -0.0 23.065294 -0.13149762 0.14811686 -0.7162557 1.0370971 0.0 15.165984 -0.0 15.165984 -0.06236292 0.08321518 -0.9067523 0.9922458 0.0 13.664733 -0.0 13.664733 -0.06471479 0.1024472 -0.15943134 0.7988499 0.0 19.025272 -0.0 19.025272 -0.06320205 0.08291938 -0.32540628 0.5203079 0.0 6.727217 -0.0 6.727217 -0.037707984 0.051601283 -0.25622904 0.11251946 0.0 3.2003012 -0.0 3.2003012 -0.056007143 0.09549151 -0.11591503 0.06267536 0.0 4.321189 -0.0 4.321189 -0.060094673 0.10868926 -0.105962686 0.09584572 0.0 2.936297 -0.0 2.936297 -0.034618977 0.05792674 -0.4237576 0.11035452 0.0 4.87262 -0.0 4.87262 -0.035480656 0.058295887 -0.21477045 0.14263579 0.0 10.32133 -0.0 10.32133 -0.08929961 0.11301676 -0.20798548 0.47405547 0.0 13.91 -0.0 13.91 -0.6627122 0.35539475 -1.0631907 0.9830786 -70.45701 87.34367 diff --git a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/src.cc b/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/src.cc deleted file mode 100644 index bb792eaf71e851a5bf9791362aa09991dbc8ef68..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/src.cc +++ /dev/null @@ -1,164 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - -std::string dir_prefix = std::string("vgg16_cifar100_test/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); -void* var_1 = tensorAdd(var_0, conv2d_1_b); -void* var_2 = tensorRelu(var_1); -void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); -void* var_5 = tensorAdd(var_4, conv2d_2_b); -void* var_6 = tensorRelu(var_5); -void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); -void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); -void* var_9 = tensorAdd(var_8, conv2d_3_b); -void* var_10 = tensorRelu(var_9); -void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); -void* var_13 = tensorAdd(var_12, conv2d_4_b); -void* var_14 = tensorRelu(var_13); -void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); -void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); -void* var_17 = tensorAdd(var_16, conv2d_5_b); -void* var_18 = tensorRelu(var_17); -void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); -void* var_21 = tensorAdd(var_20, conv2d_6_b); -void* var_22 = tensorRelu(var_21); -void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); -void* var_25 = tensorAdd(var_24, conv2d_7_b); -void* var_26 = tensorRelu(var_25); -void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); -void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); -void* var_29 = tensorAdd(var_28, conv2d_8_b); -void* var_30 = tensorRelu(var_29); -void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); -void* var_33 = tensorAdd(var_32, conv2d_9_b); -void* var_34 = tensorRelu(var_33); -void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); -void* var_37 = tensorAdd(var_36, conv2d_10_b); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); -void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); -void* var_41 = tensorAdd(var_40, conv2d_11_b); -void* var_42 = tensorRelu(var_41); -void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); -void* var_45 = tensorAdd(var_44, conv2d_12_b); -void* var_46 = tensorRelu(var_45); -void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); -void* var_49 = tensorAdd(var_48, conv2d_13_b); -void* var_50 = tensorRelu(var_49); -void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); -void* var_54 = tensorGemmGPU(var_51, dense_1_w); -void* var_55 = tensorAdd(var_54, dense_1_b); -void* var_56 = tensorRelu(var_55); -void* var_58 = tensorGemmGPU(var_56, dense_2_w); -void* var_59 = tensorAdd(var_58, dense_2_b); -void* var_60 = tensorSoftmax(var_59); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_60); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py b/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py index 8d13a292372d81d491aedf21341c0e51859be723..11c3584a41e272527bc8141d9e9a9ed2d22ab51b 100644 --- a/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py +++ b/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py @@ -28,9 +28,8 @@ def createBaselineConfig(f_path, base_flag, num_layers): if __name__ == "__main__": - FP32_binary_paths = ["alexnet_cifar10", "alexnet2_cifar10", "resnet18_cifar10", "vgg16_cifar10", "vgg16_cifar100", "lenet_mnist", "mobilenet", "mobilenet_shallow"] - FP16_binary_paths = ["alexnet_half", "alexnet2_half", "resnet18_half", "vgg16_cifar10_half", "vgg16_cifar100_half", "lenet_half", "mobilenet_half", "mobilenet_shallow_half"] - PROMISE_binary_paths = ["alexnet_promise", "alexnet2_promise", "resnet18_promise", "vgg16_cifar10_promise", "vgg16_cifar100_promise", "mobilenet_promise", "mobilenet_shallow_promise"] + FP32_binary_paths = ["alexnet_cifar10_fp32", "alexnet2_cifar10_fp32", "resnet18_cifar10_fp32", "vgg16_cifar10_fp32", "vgg16_cifar100_fp32", "lenet_mnist_fp32", "mobilenet_cifar10_fp32"] + FP16_binary_paths = ["alexnet_cifar10_fp16", "alexnet2_cifar10_fp16", "resnet18_cifar10_fp16", "vgg16_cifar10_fp16", "vgg16_cifar100_fp16", "lenet_mnist_fp16", "mobilenet_cifar10_fp16"] fp32_results = {} for binary_path in FP32_binary_paths: @@ -46,15 +45,5 @@ if __name__ == "__main__": fp16_results[binary_path] = accuracy - createBaselineConfig("promise_flags", 11, 1000) - promise_results = {} - for binary_path in PROMISE_binary_paths: - subprocess.call("./" + binary_path) - accuracy = readAccuracy("final_accuracy") - promise_results[binary_path] = accuracy - - printResults(fp32_results) printResults(fp16_results) - printResults(promise_results) - diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_knobs_utils.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_knobs_utils.cc index c5d79020acbaa6d7588577934dc222f679050ecf..b272bbcab45573f03ac17305f86a99e630db2950 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_knobs_utils.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_knobs_utils.cc @@ -23,16 +23,21 @@ PerfParams::PerfParams(int row1, int col1, int skip_offset1) { } PerfParamSet::PerfParamSet() { + printf("- knobs_file_path = %s \n", GLOBAL_KNOBS_FILE); std::ifstream file(GLOBAL_KNOBS_FILE); + if (!file){ + ERROR(" Could NOT find global_knobs.txt \n"); + } + std::string line; std::string partial; std::vector<std::string> tokens; while (std::getline(file, line)) { // Read each line - // printf ("***** line === %s ", line); + //printf ("***** line === %s ", line); std::istringstream iss(line); std::string token; while (std::getline(iss, token, '\t')) { // Read each token in the line @@ -59,8 +64,8 @@ PerfParamSet::PerfParamSet() { std::getline(token_stream, tok, ','); int offset = atoi(tok.c_str()); - printf("**** knob = %d, row = %d, col = %d, offset = %d \n\n", knob, - row, col, offset); + //printf("**** knob = %d, row = %d, col = %d, offset = %d \n\n", knob, + // row, col, offset); PerfParams params(row, col, offset); perf_knob_map[knob] = params; } @@ -92,9 +97,14 @@ SampParams::SampParams(int skip_rate1, int skip_offset1, } SampParamSet::SampParamSet() { + printf("- knobs_file_path = %s \n", GLOBAL_KNOBS_FILE); std::ifstream file(GLOBAL_KNOBS_FILE); + if (!file){ + ERROR("Could NOT find global_knobs.txt \n"); + } + std::string line; std::string partial; std::vector<std::string> tokens; @@ -114,7 +124,7 @@ SampParamSet::SampParamSet() { int index2 = token.find(","); std::string knob_str = token.substr(index2 + 1); int knob = atoi(knob_str.c_str()); - printf("knob = %d \n", knob); + //printf("knob = %d \n", knob); std::getline(iss, token, '\t'); std::istringstream token_stream(token); @@ -130,7 +140,7 @@ SampParamSet::SampParamSet() { std::getline(token_stream, tok, ','); float interpolation_id = atof(tok.c_str()); - printf("skip_every = %d, offset = %d \n", skip_every, offset); + //printf("skip_every = %d, offset = %d \n", skip_every, offset); SampParams params(skip_every, offset, interpolation_id); samp_knob_map[knob] = params; } diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu index 5051b780894b6e758cf768e663739ea6b92c71e5..9a3c9ca848d443a20f1dcbb98fb3eda52ee15945 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu @@ -1,9 +1,17 @@ +//===--------------------------- approxs_simulator.cu ---------------------===// +// +//===----------------------------------------------------------------------===// +// +// This file consists of the simulation of implementation of software +// approximations for tensor convolutions. The approximations implemented are +// feature sampling and perforation for FP32 and FP16 compute precisions. +// +//===----------------------------------------------------------------------===// + #ifndef SIM_HEADER #define SIM_HEADER - - #include "tensor_runtime.h" #include "tensor_utils.h" #include "debug.h" @@ -29,8 +37,6 @@ #include <cassert> - - //N is new_data's size //n, c, h, w are the dimensions of new_data __global__ @@ -925,7 +931,7 @@ int getSwing(int swing){ void initializeAutotuner(){ - printf("initializing tuner .... \n"); + DEBUG("initializing tuner .... \n"); sampParamSet = new SampParamSet; perfParamSet = new PerfParamSet; diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu similarity index 90% rename from hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu rename to hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu index 8f2d840362ee523a458339b848e9080a2822d92f..1b770736bab93dd6a47cb4351dd0ad054e8eb14d 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu @@ -1,3 +1,13 @@ +//===--------------------------- approxtechniques.cu ---------------------===// +// +//===----------------------------------------------------------------------===// +// +// This file consists of the custom implementation of software approximations +// for tensor convolutions. The approximations implemented are feature sampling +// and perforation for FP32 and FP16 compute precisions. +// +//===----------------------------------------------------------------------===// + #include "tensor_utils.h" #include "approx_utils.h" @@ -159,19 +169,13 @@ __global__ void convToGemmHalfInputNewIrregular(__half * const __restrict__ outp if(n < N) { //is thread id within bounds? for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { - //const int ki = c * KH * KW + i; - //const int kj = c * KH * KW + j; + const int filter_elem_num = (c * KH + i) * KW + j; //index of this filter element if((filter_elem_num - skip_offset) % skip_every) { const int condition = (filter_elem_num < skip_offset); const int output_col = condition * filter_elem_num + (!condition) * (filter_elem_num - ((filter_elem_num + 1 - skip_offset) / skip_every) - - ((filter_elem_num + 1 - skip_offset) % skip_every > 0)); - //if(filter_elem_num % skip_every != skip_offset) { - // int output_col = filter_elem_num - - // (filter_elem_num/skip_every + (filter_elem_num % skip_every > skip_offset)); - //if(skip_every == 1) - // output_col = filter_elem_num; + - ((filter_elem_num + 1 - skip_offset) % skip_every > 0)); const int out_index = ((n * reduced_filter_elem + output_col) * H_out + h) * W_out + w; //((output_col*N + n) * H_out + h) * W_out + w; if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) @@ -203,22 +207,16 @@ __global__ void convToGemmHalfInputNewIrregular2(__half * const __restrict__ out if(n < N) { //is thread id within bounds? for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { - //const int ki = c * KH * KW + i; - //const int kj = c * KH * KW + j; - const int filter_elem_num = (c * KH + i) * KW + j; //index of this filter element + + const int filter_elem_num = (c * KH + i) * KW + j; //index of this filter element if((filter_elem_num - skip_offset) % skip_every) { const int condition = (filter_elem_num < skip_offset); const int output_col = condition * filter_elem_num + (!condition) * (filter_elem_num - ((filter_elem_num + 1 - skip_offset) / skip_every) - ((filter_elem_num + 1 - skip_offset) % skip_every > 0)); - //if(filter_elem_num % skip_every != skip_offset) { - // int output_col = filter_elem_num - - // (filter_elem_num/skip_every + (filter_elem_num % skip_every > skip_offset)); - //if(skip_every == 1) - // output_col = filter_elem_num; + const int out_index = ((output_col * N + n) * H_out + h) * W_out + w; - //((n * reduced_filter_elem + output_col) * H_out + h) * W_out + w; - //((output_col*N + n) * H_out + h) * W_out + w + if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) output[out_index] = input[((n * C + c) * H + (inH + i)) * W + (inW + j)]; else @@ -278,15 +276,13 @@ __global__ void convToGemmPerfRow(float * const __restrict__ output, } const int inH = h_index * V_stride - V_pad; const int inW = w * H_stride - H_pad; //input width index (col number) - //#pragma unroll - //for (int ki = 0; ki < KH * KW; ki++) { - // int i = ki / KW; - // int j = ki % KW; + for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { const int filter_elem_num = c * KH * KW + i* KW + j; //index of this filter element - const int out_index = ((n * C * KH * KW + filter_elem_num) * H_eff + h) * W_out + w; - if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) + const int out_index = ((n * C * KH * KW + filter_elem_num) * H_eff + h) * W_out + w; + + if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) output[out_index] = input[((n * C + c) * H + (inH + i)) * W + (inW + j)]; else output[out_index] = 0; @@ -347,11 +343,7 @@ __global__ void convToGemmPerfCol(float * const __restrict__ output, } const int inW = w_index * H_stride - H_pad; const int inH = h * V_stride - V_pad; //input height index (row number) - //#pragma unroll - //for (int ki = 0; ki < KH * KW; ki++) { - // int i = ki / KW; - // int j = ki % KW; - + for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { const int filter_elem_num = c * KH * KW + i * KW + j; //index of this filter element @@ -417,11 +409,8 @@ __global__ void convToGemmPerfRowHalf(__half * const __restrict__ output, } const int inH = h_index * V_stride - V_pad; const int inW = w * H_stride - H_pad; //input width index (col number) - // #pragma unroll - //for (int ki = 0; ki < KH * KW; ki++) { - // int i = ki / KW; - // int j = ki % KW; - + + for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { const int filter_elem_num = c * KH * KW + i * KW + j; //index of this filter element @@ -455,38 +444,31 @@ __global__ void convToGemmPerfRowHalf2(__half * const __restrict__ output, } const int inH = h_index * V_stride - V_pad; const int inW = w * H_stride - H_pad; //input width index (col number) - // #pragma unroll - //for (int ki = 0; ki < KH * KW; ki++) { - // int i = ki / KW; - // int j = ki % KW; - for(int i = 0; i < KH; i++) { - for(int j = 0; j < KW; j++) { - const int filter_elem_num = c * KH * KW + i * KW + j; //index of this filter element - const int out_index = ((filter_elem_num * N + n) * H_eff + h) * W_out + w; - //((n * C * KH * KW + filter_elem_num) * H_eff + h) * W_out + w; - if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) - output[out_index] = input[((n * C + c) * H + (inH + i)) * W + (inW + j)]; - else - output[out_index] = 0; - } - } + + + for(int i = 0; i < KH; i++) { + for(int j = 0; j < KW; j++) { + const int filter_elem_num = c * KH * KW + i * KW + j; //index of this filter element + const int out_index = ((filter_elem_num * N + n) * H_eff + h) * W_out + w; + + if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) + output[out_index] = input[((n * C + c) * H + (inH + i)) * W + (inW + j)]; + else + output[out_index] = 0; + + } + } + } } __global__ void approxInterpolateRowHalf(int N, int old_h, int j, int c, int h, int w, __half *old_data, __half *new_data, int x, int start) { - //const int index = blockDim.x * blockIdx.x + threadIdx.x; //thread id - //const int n = tx / (c * h * w); //output image number - //const int stride = blockDim.x * gridDim.x; + const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id const int n = tx / (c * h * w); //output image number if(n < N) { - //for(int i = index; i < N; i += stride){ - //const int col = ((i % (c * h * w)) % (h * w)) % w; - //const int row = ((i % (c * h * w)) % (h * w)) / w; - //const int ch = (i % (c * h * w)) / (h * w); - // const int n = i / (c * h * w); const int ch = tx % (c * h * w) / (h * w); //filter number const int row = tx % (h * w) / w; //output height index (row number) @@ -517,17 +499,9 @@ __global__ void approxInterpolateRowHalf(int N, int old_h, int j, int c, int h, __global__ void approxInterpolateRowHalf2(int N, int old_h, int b, int c, int h, int w, __half *old_data, __half *new_data, int x, int start) { - //const int index = blockDim.x * blockIdx.x + threadIdx.x; //thread id - //const int n = tx / (c * h * w); //output image numbe - //const int stride = blockDim.x * gridDim.x; const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id const int n = tx / (c * h * w); //output image number if(n < N) { - //for(int i = index; i < N; i += stride){ - //const int col = ((i % (c * h * w)) % (h * w)) % w; - //const int row = ((i % (c * h * w)) % (h * w)) / w; - //const int ch = (i % (c * h * w)) / (h * w); - //const int n = i / (c * h * w); const int ch = tx % (c * h * w) / (h * w); //filter number const int row = tx % (h * w) / w; //output height index (row number) @@ -544,13 +518,11 @@ __global__ void approxInterpolateRowHalf2(int N, int old_h, int b, int c, int h, } else if((row - start) % x == 0) { const int row_index = row - ((row + 1 - start) / x); const int output_index = ch * (b * old_h * w) + n * (old_h * w) + row_index * (w) + col; - //n * (c * old_h * w) + ch * (old_h * w) + row_index * (w) + col; new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = __hdiv(__hadd(old_data[output_index], old_data[output_index - w]), 2); } else { const int row_index = row - ((row + 1 - start) / x) - ((row + 1 - start) % x > 0); const int output_index = ch * (b * old_h * w) + n * (old_h * w) + row_index * (w) + col; - //n * (c * old_h * w) + ch * (old_h * w) + row_index * (w) + col; new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = old_data[output_index]; } } @@ -577,11 +549,7 @@ __global__ void convToGemmPerfColHalf(__half * const __restrict__ output, } const int inW = w_index * H_stride - H_pad; const int inH = h * V_stride - V_pad; //input height index (row number) - //#pragma unroll - // for (int ki = 0; ki < KH * KW; ki++) { - // int i = ki / KW; - // int j = ki % KW; - + for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { const int filter_elem_num = c * KH * KW + i * KW + j; //index of this filter element @@ -616,10 +584,8 @@ __global__ void convToGemmPerfColHalf2(__half * const __restrict__ output, } const int inW = w_index * H_stride - H_pad; const int inH = h * V_stride - V_pad; //input height index (row number) - //#pragma unroll - // for (int ki = 0; ki < KH * KW; ki++) { - // int i = ki / KW; - // int j = ki % KW; + + for(int i = 0; i < KH; i++) { for(int j = 0; j < KW; j++) { const int filter_elem_num = c * KH * KW + i * KW + j; //index of this filter elemen @@ -637,15 +603,6 @@ __global__ void convToGemmPerfColHalf2(__half * const __restrict__ output, __global__ void approxInterpolateColHalf(int N, int old_w, int b, int c, int h, int w, __half *old_data, __half *new_data, int x, int start) { - //const int index = blockDim.x * blockIdx.x + threadIdx.x; //thread id - //const int stride = blockDim.x * gridDim.x; - - //for(int i = index; i < N; i += stride){ - // const int col = ((i % (c * h * w)) % (h * w)) % w; - // const int row = ((i % (c * h * w)) % (h * w)) / w; - // const int ch = (i % (c * h * w)) / (h * w); - // const int n = i / (c * h * w); - const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id const int n = tx / (c * h * w); //output image number if(n < N) { @@ -678,14 +635,6 @@ __global__ void approxInterpolateColHalf(int N, int old_w, int b, int c, int h, __global__ void approxInterpolateColHalf2(int N, int old_w, int b, int c, int h, int w, __half *old_data, __half *new_data, int x, int start) { - //const int index = blockDim.x * blockIdx.x + threadIdx.x; //thread id - //const int stride = blockDim.x * gridDim.x; - - // for(int i = index; i < N; i += stride){ - // const int col = ((i % (c * h * w)) % (h * w)) % w; - // const int row = ((i % (c * h * w)) % (h * w)) / w; - // const int ch = (i % (c * h * w)) / (h * w); - // const int n = i / (c * h * w); const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id const int n = tx / (c * h * w); //output image number if(n < N) { @@ -695,25 +644,23 @@ __global__ void approxInterpolateColHalf2(int N, int old_w, int b, int c, int h, if(col < start) { new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = old_data[ch * (b * h * old_w) + n * (h * old_w) + row * old_w + col]; - //n * (c * h * old_w) + ch * (h * old_w) + row * old_w + col]; + } else if(col == w - 1) { new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = old_data[ch * (b * h * old_w) + n * (h * old_w) + row * (old_w) + old_w - 1]; - //n * (c * h * old_w) + ch * (h * old_w) + row * (old_w) + old_w - 1]; + } else if (col == 0) { new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = old_data[ch * (b * h * old_w) + n * (h * old_w) + row * (old_w)]; - //n * (c * h * old_w) + ch * (h * old_w) + row * (old_w)]; + } else if((col - start) % x == 0) { const int col_index = col - ((col + 1 - start) / x); const int output_index = ch * (b * h * old_w) + n * (h * old_w) + row * old_w + col_index; - //n * (c * h * old_w) + ch * (h * old_w) + row * old_w + col_index; new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = __hdiv(__hadd(old_data[output_index], old_data[output_index - 1]), 2); } else { const int col_index = col - ((col + 1 - start) / x) - ((col + 1 - start) % x > 0); const int output_index = ch * (b * h * old_w) + n * (h * old_w) + row * old_w + col_index; - //const int output_index = n * (c * h * old_w) + ch * (h * old_w) + row * old_w + col_index; new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = old_data[output_index]; } } @@ -749,6 +696,7 @@ __global__ void convToGemmFullInputRegular(float * const __restrict__ output, in_index = ((fi - offset + 1) * skip_every) / (skip_every - 1) + (((fi - offset + 1) * skip_every) % (skip_every - 1) > 0) + offset - 1; } + const int i = (in_index % (KW * KH)) / KW; const int j = in_index % KW; const int out_index = ((n * reduced_filter_elem + fi) * H_out + h) * W_out + w; @@ -799,13 +747,15 @@ __global__ void convToGemmFullInputIrregular(float * const __restrict__ output, } } } + + } __global__ void createReducedFiltersFullRegular(float * output, - const float * const __restrict input, const int NF, - const int num_filter_elem, const int reduced_filter_elem, - const int channels, - const int skip_every, const int skip_offset, const float fac) { + const float * const __restrict input, const int NF, + const int num_filter_elem, const int reduced_filter_elem, + const int channels, + const int skip_every, const int skip_offset, const float fac) { const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id const int fIdx = tx / reduced_filter_elem; //filter index @@ -816,11 +766,13 @@ __global__ void createReducedFiltersFullRegular(float * output, int in_index; if(offset < channel_offset) { in_index = offset; - } else { + } + else { in_index = ((offset - channel_offset + 1) * skip_every) / (skip_every - 1) + (((offset - channel_offset + 1) * skip_every) % (skip_every - 1) > 0) + channel_offset -1; - } - output[fIdx * reduced_filter_elem + offset] = fac * input[num_filter_elem * fIdx + in_index]; + } + + output[fIdx * reduced_filter_elem + offset] = fac * input[num_filter_elem * fIdx + in_index]; } } @@ -863,30 +815,23 @@ __global__ void convToGemmHalfInputRegular(__half * const __restrict__ output, const int inH = h * V_stride - V_pad; //input height index (row number) const int inW = w * H_stride - H_pad; //input width index (col number) - #pragma unroll - //for(int fi = 0; fi < reduced_filter_elem; fi++) { - //const int ch = (fi * C) / reduced_filter_elem; + #pragma unroll for(int ki = 0; ki < reduced_filter_elem / C; ki++) { - const int fi = ch * (reduced_filter_elem / C) + ki; - const int offset = (skip_offset + ch) % skip_every; - //int in_index; + const int fi = ch * (reduced_filter_elem / C) + ki; + const int offset = (skip_offset + ch) % skip_every; + const bool condition = (fi < offset); const int in_index = condition * fi + (!condition) * (((fi - offset + 1) * skip_every) / (skip_every - 1) + (((fi - offset + 1) * skip_every) % (skip_every - 1) > 0) + offset - 1); - //if(fi < offset) { - // in_index = fi; - //} else { - // in_index = ((fi - offset + 1) * skip_every) / (skip_every - 1) - // + (((fi - offset + 1) * skip_every) % (skip_every - 1) > 0) + offset - 1; - // } - const int i = (in_index % (KW * KH)) / KW; - const int j = in_index % KW; - const int out_index = ((n * reduced_filter_elem + fi) * H_out + h) * W_out + w; - if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) { - output[out_index] = input[((n * C + ch) * H + (inH + i)) * W + (inW + j)]; - } else { + + const int i = (in_index % (KW * KH)) / KW; + const int j = in_index % KW; + const int out_index = ((n * reduced_filter_elem + fi) * H_out + h) * W_out + w; + if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) { + output[out_index] = input[((n * C + ch) * H + (inH + i)) * W + (inW + j)]; + } else { output[out_index] = 0; - } + } } } } @@ -912,26 +857,20 @@ __global__ void convToGemmHalfInputRegular2(__half * const __restrict__ output, #pragma unroll for(int ki = 0; ki < reduced_filter_elem / C; ki++) { - const int fi = ch * (reduced_filter_elem / C) + ki; - //for(int fi = 0; fi < reduced_filter_elem; fi++) { - // const int ch = (fi * C) / reduced_filter_elem; + + const int fi = ch * (reduced_filter_elem / C) + ki; const int offset = (skip_offset + ch) % skip_every; const int condition = (fi < offset); - const int in_index = condition * fi + (! condition) * (((fi - offset + 1) * skip_every) / (skip_every - 1) + const int in_index = condition * fi + (! condition) * (((fi - offset + 1) * skip_every) / (skip_every - 1) + (((fi - offset + 1) * skip_every) % (skip_every - 1) > 0) + offset - 1); - // int in_index; - //if(fi < offset) { - // in_index = fi; - //} else { - // in_index = ((fi - offset + 1) * skip_every) / (skip_every - 1) - // + (((fi - offset + 1) * skip_every) % (skip_every - 1) > 0) + offset - 1; - // } + const int i = (in_index % (KW * KH)) / KW; const int j = in_index % KW; const int out_index = ((fi * N + n) * H_out + h) * W_out + w; if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) { output[out_index] = input[((n * C + ch) * H + (inH + i)) * W + (inW + j)]; - } else { + } + else { output[out_index] = 0; } } @@ -961,20 +900,15 @@ __global__ void convToGemmHalfInputIrregular(__half * const __restrict__ output, const int condition = (fi < skip_offset); const int in_index = condition * fi + (! condition) * (((fi - skip_offset + 1) * skip_every) / (skip_every - 1) + (((fi - skip_offset + 1) * skip_every) % (skip_every - 1) > 0) + skip_offset - 1); - //int in_index; - //if(fi < skip_offset) { - // in_index = fi; - //} else { - // in_index = ((fi - skip_offset + 1) * skip_every) / (skip_every - 1) - // + (((fi - skip_offset + 1) * skip_every) % (skip_every - 1) > 0) + skip_offset - 1; - // } - const int ch = in_index / (KW * KH); + + const int ch = in_index / (KW * KH); const int i = (in_index % (KW * KH)) / KW; const int j = in_index % KW; const int out_index = ((n * reduced_filter_elem + fi) * H_out + h) * W_out + w; if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) { output[out_index] = input[((n * C + ch) * H + (inH + i)) * W + (inW + j)]; - } else { + } + else { output[out_index] = 0; } } @@ -1003,18 +937,11 @@ __global__ void convToGemmHalfInputIrregular2(__half * const __restrict__ output const int condition = (fi < skip_offset); const int in_index = condition * fi + (!condition) * (((fi - skip_offset + 1) * skip_every) / (skip_every - 1) + (((fi - skip_offset + 1) * skip_every) % (skip_every - 1) > 0) + skip_offset - 1); - // int in_index; - // if(fi < skip_offset) { - // in_index = fi; - // } else { - // in_index = ((fi - skip_offset + 1) * skip_every) / (skip_every - 1) - // + (((fi - skip_offset + 1) * skip_every) % (skip_every - 1) > 0) + skip_offset - 1; - // } + const int ch = in_index / (KW * KH); const int i = (in_index % (KW * KH)) / KW; const int j = in_index % KW; const int out_index = ((fi * N + n) * H_out + h) * W_out + w; - //const int out_index = ((n * reduced_filter_elem + fi) * H_out + h) * W_out + w; if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) { output[out_index] = input[((n * C + ch) * H + (inH + i)) * W + (inW + j)]; } else { @@ -1032,11 +959,8 @@ __global__ void createReducedFiltersHalfRegular(__half * output, const int skip_every, const int skip_offset, const float fac) { const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id - //const int stride = blockDim.x * gridDim.x; - - //#pragma unroll - //for (int i = tx; i < NF; i += stride) { - const int fIdx = tx / reduced_filter_elem; //filter index + + const int fIdx = tx / reduced_filter_elem; //filter index if(fIdx < NF) { const int offset = tx % reduced_filter_elem; //offset within filter const int ch = (offset * channels) / reduced_filter_elem; @@ -1045,15 +969,9 @@ __global__ void createReducedFiltersHalfRegular(__half * output, const int in_index = condition * offset + (!condition) * (((offset - channel_offset + 1) * skip_every) / (skip_every - 1) + (((offset - channel_offset + 1) * skip_every) % (skip_every - 1) > 0) + channel_offset - 1); - // int in_index; - // if(offset < channel_offset) { - // in_index = offset; - //} else { - // in_index = ((offset - channel_offset + 1) * skip_every) / (skip_every - 1) - // + (((offset - channel_offset + 1) * skip_every) % (skip_every - 1) > 0) + channel_offset -1; - // } output[fIdx * reduced_filter_elem + offset] = __hmul(__float2half_rn(fac), input[num_filter_elem * fIdx + in_index]); } + } __global__ void createReducedFiltersHalfIrregular(__half * output, @@ -1061,21 +979,20 @@ __global__ void createReducedFiltersHalfIrregular(__half * output, const int num_filter_elem, const int reduced_filter_elem, const int skip_every, const int skip_offset, const float fac) { - const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id - //const int stride = blockDim.x * gridDim.x; - //#pragma unroll - //for (int i = tx; i < NF; i += stride) { + const int tx = blockDim.x * blockIdx.x + threadIdx.x; //thread id + const int fIdx = tx / reduced_filter_elem; //filter index - const int fIdx = tx / reduced_filter_elem; //filter index - if(fIdx < NF) { - const int offset = tx % reduced_filter_elem; //offset within filter - const int condition = (offset < skip_offset); - int in_index = condition * offset + (!condition) * (((offset - skip_offset + 1) * skip_every) / (skip_every - 1) + if(fIdx < NF) { + + const int offset = tx % reduced_filter_elem; //offset within filter + const int condition = (offset < skip_offset); + + int in_index = condition * offset + (!condition) * (((offset - skip_offset + 1) * skip_every) / (skip_every - 1) + (((offset - skip_offset + 1) * skip_every) % (skip_every - 1) > 0) + skip_offset - 1); - //} - output[fIdx * reduced_filter_elem + offset] = __hmul(__float2half_rn(fac), input[num_filter_elem * fIdx + in_index]); - //} + + output[fIdx * reduced_filter_elem + offset] = __hmul(__float2half_rn(fac), input[num_filter_elem * fIdx + in_index]); } + } @@ -1102,7 +1019,7 @@ __global__ void convToGemmApprox(float * const __restrict__ output, for(int j = 0; j < KW; j++) { const int filter_elem_num = (c * KH + i) * KW + j; //index of this filter element if(filter_elem_num % skip_every != skip_every-1) { //are we including this filter element? - const int output_col = filter_elem_num - (filter_elem_num/skip_every); //calculate output column, taking skipping into account + const int output_col = filter_elem_num - (filter_elem_num/skip_every); //cal output column, taking skipping into account if(inH + i >= 0 && inH + i < H && inW + j >= 0 && inW + j < W) output[((n * reduced_filter_elem + output_col) * H_out + h) * W_out + w] = input[((n * C + c) * H + (inH + i)) * W + (inW + j)]; else @@ -1120,8 +1037,6 @@ void* tensorConvPerfCuda(void* input_ptr, void* filter_ptr, int horizontal_stride, int conv_mode, int conv_groups, int row, int col, int start){ - //////INFO("*** TensorConvolution (output perforation) \n"); - //Event("Conv"); Tensor* input = (Tensor*)input_ptr; Tensor* filter = (Tensor*)filter_ptr; //FIXME: Current hack to preserve backward compatibilty @@ -1134,10 +1049,8 @@ void* tensorConvPerfCuda(void* input_ptr, void* filter_ptr, hostToDeviceCopy(input); hostToDeviceCopy(filter); - //Event("H2F_start"); convertToFP32(input); convertToFP32(filter); - //Event("H2F_end"); long int n, c, h, w; // output dimensions n = input->dims.dim_sizes[0]; @@ -1211,14 +1124,14 @@ void* tensorConvPerfCuda(void* input_ptr, void* filter_ptr, freeTensor(output); cudaFree(convData); - } else if(col > 1){ + } + else if(col > 1){ output = (Tensor*)create4DTensor((cudnnDataType_t) float_type, //input->data_type, CUDNN_TENSOR_NCHW, n, c, h, w_eff); // NOTE: Changing output tensor placement from host to device changeTensorPlacement(output, DEVICE); - // NOTE: Necessary to insert the above call for every output tensor - //total number of filter elem + const long int num_filter_elem = KH * KW * input->dims.dim_sizes[1]; float * convData; @@ -1540,7 +1453,8 @@ void* tensorConvApprox(void* input_ptr, void* filter_ptr, cudaFree(convData); cudaFree(reducedFilter); } else { - INFO("FP32 BASELINE\n"); + + //INFO("FP32 BASELINE\n"); Tensor *output = (Tensor*)create4DTensor((cudnnDataType_t) float_type, CUDNN_TENSOR_NCHW, n, c, h, w); changeTensorPlacement(output, DEVICE); @@ -1986,14 +1900,12 @@ void* tensorConvApproxHalf2(void* input_ptr, void* filter_ptr, freeTensor(output); cudaFree(convData); } -// INFO("CONV DONE\n"); + profileEvent("H2F_start"); convertToFP32_offline(new_output); - //convertToFP32(input); - //convertToFP32(filter); + profileEvent("H2F_end"); - //profileEvent("#Conv_end"); - //INFO("CONVOLUTION END\n"); + return new_output; } diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/debug.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/debug.cc index ebb7e73f2b5a019954e7390f3eb8fadc96a3719e..3e4aecb824a93b932ef2146380b86496f71b0f28 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/debug.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/debug.cc @@ -3,8 +3,9 @@ #ifndef RUNTIME_DEBUG #define RUNTIME_DEBUG -#define LOG_DEBUG 1 // Sets the debug logging to true +#define LOG_DEBUG 0 // Sets the debug logging to true #define LOG_INFO 1 // Sets the info logging to true +#define LOG_ERROR 1 // Print Errors #define ASSERT_FLAG // Sets assertions to true (opposite of NDEBUG macro) #include "debug.h" @@ -35,7 +36,7 @@ void DEBUG(const char *format, ...) { } void ERROR(const char *format, ...) { - if (!LOG_DEBUG) // Don't print if logging info is disabled + if (!LOG_ERROR) // Don't print if logging info is disabled return; va_list args; va_start(args, format); diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/error.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/error.cu index 6b8ee15a42106b2d6857065941324e50157763d5..7a700b435efe464153fbba7997662c7dfa970385 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/error.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/error.cu @@ -55,7 +55,7 @@ void readOpenTunerFlags(const char* file_name){ FILE* fp = fopen(file_name, "r"); if(fp == NULL){ - INFO("\nWARNING: File 'opentuner_flags' not found \n\n\n"); + DEBUG("\n WARNING: File 'opentuner_flags' not found \n\n\n"); return; } diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu index 1990967deff6bc85cc8c9fc666ab497fb6d77991..f24e8b58dbeb5a49e0eaf51cfac1f2d2f3148caa 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu @@ -49,8 +49,8 @@ void* tensorHalfGemm(void* lhs_ptr, void* rhs_ptr){ Tensor* lhs = (Tensor*) lhs_ptr; Tensor* rhs = (Tensor*) rhs_ptr; - INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); - INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); + DEBUG("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); + DEBUG("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); hostToDeviceCopy(lhs); hostToDeviceCopy(rhs); @@ -76,7 +76,7 @@ void* tensorHalfGemm(void* lhs_ptr, void* rhs_ptr){ int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; // Dimension-note: Check if k is same across the two tensors - INFO("m = %d, n = %d, k = %d \n", m, n, k); + DEBUG("m = %d, n = %d, k = %d \n", m, n, k); if(rhs_k != k){ ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); } @@ -115,14 +115,10 @@ void* tensorHalfGemm(void* lhs_ptr, void* rhs_ptr){ //h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data); - profileEvent("H2F_end"); - profileEvent("#tensorHalfGemm_end"); - - return output; } @@ -263,18 +259,14 @@ void* tensorHalfConvolution(void* input_ptr, void* filter_ptr, output->tensor_half_desc, output->gpu_half_data)); - profileEvent("H2F_start"); convertToFP32_offline(output); profileEvent("H2F_end"); - - profileEvent("#tensorHalfConv_end"); - return output; } diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc index 284a75c444f54a0f3aa3412c8cd177d4ebad4e2e..8b5c4aaf93db40c038c4a9a30569318ae00d6be1 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc @@ -31,14 +31,14 @@ void llvm_hpvm_initTensorRt(int gpuid) { if (!runtime_initialized) { - printf("INITIALIZING GPU %d \n", gpuid); + INFO("INITIALIZING GPU %d \n", gpuid); // NOTE: Setting the target GPU. Can we use multiple GPUs? checkCudaErrors(cudaSetDevice(gpuid)); // Initializing cuDNN and cuBlas handles checkCudaErrors(cublasCreate(&cublasHandle)); checkCUDNN(cudnnCreate(&cudnnHandle)); - printf("CREATED HANDLES %d \n", gpuid); + DEBUG("CREATED HANDLES %d \n", gpuid); #ifdef PROMISE_TUNER_ENABLED // readOpenTunerFlags("opentuner_flags"); @@ -46,7 +46,7 @@ void llvm_hpvm_initTensorRt(int gpuid) { readOpenTunerFlags("promise_flags"); initializeAutotuner(); - printf("Read PROMISE FLAGS %d \n", gpuid); + DEBUG("Read PROMISE FLAGS %d \n", gpuid); #endif @@ -57,7 +57,7 @@ void llvm_hpvm_initTensorRt(int gpuid) { runtime_initialized = true; } - printf("DONE INTIALIZING GPU %d \n", gpuid); + INFO("DONE INTIALIZING GPU %d \n\n", gpuid); } void llvm_hpvm_cleanupTensorRt() { diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc index 18ebcfe4ef7e532e4657303baef6ea585b402a18..8683cbb416428f4691a10d2d9cd57a7252421899 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc @@ -64,7 +64,7 @@ void profileEvent(const char *event_name, bool compare_previous = false) { std::chrono::duration<double, std::ratio<1>> current_time = time_reading - zero_time; - INFO("AbsoluteTime, Event = %s, Time = %f \n", event_name, + DEBUG("AbsoluteTime, Event = %s, Time = %f \n", event_name, current_time.count()); profile_data.append(event_name); profile_data.append(event_count); @@ -77,7 +77,7 @@ void profileEvent(const char *event_name, bool compare_previous = false) { profile_data.append("\t"); profile_data.append(std::to_string(duration_time.count())); - INFO("TimeDuration, Event = %s, Time = %f \n", event_name, + DEBUG("TimeDuration, Event = %s, Time = %f \n", event_name, duration_time.count()); } diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc index 898d92c18cb8ad0b2df7a6d0c9d905c9649c53c1..9250810a2010a235074c0d29b8fe8bd63650324c 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc @@ -1,8 +1,13 @@ -/* This file includes the API implementation of the HPVM tensor runtime built for CPU -** -** Author: Hashim Sharif -** Email: hsharif3@illinois.edu -*/ +//===--------------------------- tensor_runtime_cpu.cc --------------------===// +// +//===----------------------------------------------------------------------===// +// +// This file consists of the custom implementation of non-approximated and +// approximated versions of tensor operations to execute on CPUs. The +// software approximations implemented for tensor convolutions are feature +// sampling and perforation for FP32 compute precisions only. +// +//===----------------------------------------------------------------------===// #include <algorithm> #include <cfloat> diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu index 5635107d09644b22afc54175848b6e44b9c83406..319936b482c455af2fcc0280adb15d7c126c088a 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu @@ -68,8 +68,8 @@ void* tensorAdd(void* x_ptr, void* bias_ptr){ convertToFP32(bias); - INFO("x->num_elems = %d \n", x->num_elems); - INFO("bias->num_elems = %d \n", bias->num_elems); + DEBUG("x->num_elems = %d \n", x->num_elems); + DEBUG("bias->num_elems = %d \n", bias->num_elems); if(cudnnHandle == NULL){ ERROR("cudnnHandle NOT initialized!! \n"); @@ -132,7 +132,7 @@ void* tensorConvolution(void* input_ptr, void* filter_ptr, convertToFP32(filter); - INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride); + DEBUG("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride); checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); @@ -363,8 +363,8 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ Tensor* rhs = (Tensor*) rhs_ptr; - INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); - INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); + DEBUG("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); + DEBUG("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); // FIXIT: Need to be more aware of the implications of alpha and beta float alpha = 1.0f, beta = 0.0f; @@ -382,7 +382,7 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; // Dimension-note: Check if k is same across the two tensors - INFO("m = %d, n = %d, k = %d \n", m, n, k); + DEBUG("m = %d, n = %d, k = %d \n", m, n, k); if(rhs_k != k){ ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); } @@ -450,7 +450,7 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ void* tensorRelu(void* input_ptr){ - INFO("*** TensorRelu \n"); + DEBUG("*** TensorRelu \n"); profileEvent("Relu"); Tensor* input = (Tensor*) input_ptr; @@ -700,7 +700,7 @@ void** tensorSplit(void* tensor_ptr, int num_splits, int split_dim){ for(unsigned int i = 0; i < num_splits; i++){ - INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", + DEBUG("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); Tensor* split = (Tensor*) create4DTensor(tensor->data_type, tensor->data_format, @@ -708,7 +708,7 @@ void** tensorSplit(void* tensor_ptr, int num_splits, int split_dim){ size_t copy_start = i * copy_size; size_t copy_stride = num_splits * copy_size; - INFO("copy_size = %d, copy_start = %d, copy_stride = %d, tensor->size_in_bytes = %d \n", + DEBUG("copy_size = %d, copy_start = %d, copy_stride = %d, tensor->size_in_bytes = %d \n", copy_size, copy_start, copy_stride, tensor->size_in_bytes); int index = 0; @@ -758,7 +758,7 @@ void* tensorConcat(void** tensors_ptr, int num_splits, int split_dim){ Tensor* output = (Tensor*) create4DTensor(tensors[0]->data_type, tensors[0]->data_format, dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); - INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", + DEBUG("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); @@ -768,7 +768,7 @@ void* tensorConcat(void** tensors_ptr, int num_splits, int split_dim){ } size_t copy_stride = num_splits * copy_size; - INFO("copy_size = %d, num_copies = %d, copy_stride = %d, output->size_in_bytes = %d \n", + DEBUG("copy_size = %d, num_copies = %d, copy_stride = %d, output->size_in_bytes = %d \n", copy_size, num_copies, copy_stride, output->size_in_bytes); for(unsigned int i = 0; i < num_copies; i++){ @@ -804,7 +804,7 @@ void* tensorLRN(void* input_ptr, unsigned int LRN_window, cudnnLRNDescriptor_t LRNDesc; checkCUDNN(cudnnCreateLRNDescriptor(&LRNDesc)); - INFO("window = %d, LRN_alpha = %f, LRN_beta = %f, LRN_k = %f \n", + DEBUG("window = %d, LRN_alpha = %f, LRN_beta = %f, LRN_k = %f \n", LRN_window, LRN_alpha, LRN_beta, LRN_k); diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu index 6bbccfabaf22a395e91748be22e1eaddcf32c0ba..2bc62057b5c13161475b50b4a750da49146b97ce 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu @@ -220,7 +220,7 @@ void set4DTensorDescriptor(struct Tensor* tensor, int data_format, size_t dim1_s &size1, &size2, &size3, &size4, &nStride, &cStride, &hStride, &wStride); - INFO("nStride = %d, cStride = %d, hStride = %d, wStride = %d \n", + DEBUG("nStride = %d, cStride = %d, hStride = %d, wStride = %d \n", nStride, cStride, hStride, wStride); } @@ -238,16 +238,16 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, } for(int i = 0; i < num_dims; i++){ - INFO("strides[%d] = %d \n", i, strides[i]); + DEBUG("strides[%d] = %d \n", i, strides[i]); } int* const_dims = (int*) malloc(sizeof(int) * num_dims); for(int j = 0 ; j < num_dims; j++){ const_dims[j] = (int) dim_sizes[j]; - INFO("const_dim = %d \n", const_dims[j]); + DEBUG("const_dim = %d \n", const_dims[j]); } - INFO("data_type = %d, cuDNN_value = %d \n", tensor->data_type, CUDNN_DATA_FLOAT); + DEBUG("data_type = %d, cuDNN_value = %d \n", tensor->data_type, CUDNN_DATA_FLOAT); // For certain operations, the strides may need to change - in which case the descriptor // needs to be reinitialized checkCUDNN(cudnnSetTensorNdDescriptor(tensor->tensor_desc, @@ -340,7 +340,7 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, if(tensor->data_placement != DEVICE){ cudaMemcpy(tensor->gpu_data, tensor->host_data, tensor->size_in_bytes, cudaMemcpyHostToDevice); - INFO("Moving %d bytes from host to GPU \n", tensor->size_in_bytes); + DEBUG("Moving %d bytes from host to GPU \n", tensor->size_in_bytes); tensor->data_placement = DEVICE; } else{ @@ -355,7 +355,7 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, if(tensor->data_placement != HOST){ cudaMemcpy(tensor->host_data, tensor->gpu_data, tensor->size_in_bytes, cudaMemcpyDeviceToHost); - INFO("Moving %d bytes from GPU to host \n", tensor->size_in_bytes); + DEBUG("Moving %d bytes from GPU to host \n", tensor->size_in_bytes); tensor->data_placement = HOST; } else{ @@ -375,13 +375,13 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, if(srcTensor->data_placement == HOST){ memcpy(dstTensor->host_data, srcTensor->host_data, srcTensor->size_in_bytes); - INFO("Moving %d bytes from host to host \n", srcTensor->size_in_bytes); + DEBUG("Moving %d bytes from host to host \n", srcTensor->size_in_bytes); dstTensor->data_placement = HOST; } else if (srcTensor->data_placement == DEVICE){ cudaMemcpy(dstTensor->gpu_data, srcTensor->gpu_data, srcTensor->size_in_bytes, cudaMemcpyDeviceToDevice); - INFO("Moving %d bytes from GPU to GPU \n", srcTensor->size_in_bytes); + DEBUG("Moving %d bytes from GPU to GPU \n", srcTensor->size_in_bytes); dstTensor->data_placement = DEVICE; } @@ -409,7 +409,7 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, if(tensor->data_placement != DEVICE){ cudaMemcpy(tensor->gpu_data, tensor->host_data, tensor->size_in_bytes, cudaMemcpyHostToDevice); - INFO("Moving %d bytes from host to GPU \n", tensor->size_in_bytes); + DEBUG("Moving %d bytes from host to GPU \n", tensor->size_in_bytes); tensor->data_placement = DEVICE; } else{ @@ -426,7 +426,7 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, if(tensor == NULL) return; - printf("**** cur_type = %d , half_type = %d \n", tensor->cur_type, half_type); + //printf("**** cur_type = %d , half_type = %d \n", tensor->cur_type, half_type); if (ONLINE_PROFILING){ if (tensor->cur_type == half_type) diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu index 6759ab3b8eb340ff136238a6643c9e38a7621c7d..f9fee629e1192ee985064a5f968376d1381d9af9 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu @@ -145,7 +145,7 @@ extern "C"{ switch (activation_id) { case -1: { // No activation - INFO("No activation Function\n"); + //INFO("No activation Function\n"); activation_out = add_out; } break; @@ -259,6 +259,8 @@ extern "C"{ // NOTE: out_min, out_max are only relevant for ClippedRelu float out_min, float out_max){ + INFO ("*** Conv Layer \n"); + NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isPROMISENodeConfiguration()) { @@ -333,9 +335,9 @@ extern "C"{ GPUConf->getApproxChoices(); - printf("*** Convolution \n ApproxChoice = %d \n BatchNorm = %d \n CONV = %d \n", ApproxChoices[0].first, - GPUNodeConfiguration::TENSOR_OP::BATCHNORM, - GPUNodeConfiguration::TENSOR_OP::CONV); + //printf("*** Convolution \n ApproxChoice = %d \n BatchNorm = %d \n CONV = %d \n", ApproxChoices[0].first, + // GPUNodeConfiguration::TENSOR_OP::BATCHNORM, + // GPUNodeConfiguration::TENSOR_OP::CONV); // Check for convolution as first operation CUSTOM_ASSERT((ApproxChoices.size() >= 1) && @@ -363,7 +365,7 @@ extern "C"{ switch (activation_id) { case -1: { // No activation - INFO("No activation Function\n"); + //INFO("No activation Function\n"); activation_out = add_out; } break; @@ -411,13 +413,6 @@ extern "C"{ // If we remove the asserts, we can have all cases handled by a single call CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MAX) && "Expected POOL_MAX in provided Conv layer configuration"); - - /*pool_out = - handleTensorPoolingApproximationTuples(ApproxChoices.back().second, - activation_out, pool_id, - pool_size, pool_size, 0, 0, - pool_size, pool_size); - */ pool_out = handleTensorPoolingApproximationTuples(ApproxChoices.back().second, activation_out, pool_id, @@ -488,6 +483,8 @@ extern "C"{ // NOTE: out_min and out_max are only relevant for ClippedRelu float out_min, float out_max){ + INFO ("*** Dense Layer \n"); + NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isPROMISENodeConfiguration()) { @@ -573,7 +570,7 @@ extern "C"{ { // No activation CUSTOM_ASSERT((ApproxChoices.size() == 2) && "Incorrect number of operations in provided FC layer configuration"); - INFO("No activation Function\n"); + //INFO("No activation Function\n"); activation_out = add_out; } break; @@ -625,8 +622,9 @@ extern "C"{ void* wrapper_tensorRelu(const char* hpvm_node_id, void* input_ptr){ - // return tensorRelu(input_ptr); + INFO("*** Relu Operation \n"); + // Only mapped to GPU - get a GPU configuration GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); @@ -693,7 +691,8 @@ extern "C"{ void* wrapper_tensorBatchNorm(const char* hpvm_node_id, void* input_ptr, void* gamma_ptr, void* beta_ptr, void* mean_ptr, void* variance_ptr, double epsilon){ - + + INFO("*** BatchNorm Operation \n"); // Only mapped to GPU - get a GPU configuration GPUNodeConfiguration *GPUConf = @@ -704,11 +703,10 @@ extern "C"{ int> > > > &ApproxChoices = GPUConf->getApproxChoices(); - - printf("*** BatchNorm \n ApproxChoice = %d \n BatchNorm = %d \n CONV = %d \n", ApproxChoices[0].first, - GPUNodeConfiguration::TENSOR_OP::BATCHNORM, - GPUNodeConfiguration::TENSOR_OP::CONV); + // printf("*** BatchNorm \n ApproxChoice = %d \n BatchNorm = %d \n CONV = %d \n", ApproxChoices[0].first, + // GPUNodeConfiguration::TENSOR_OP::BATCHNORM, + // GPUNodeConfiguration::TENSOR_OP::CONV); // Approximation choices must be for a batchnorm operation CUSTOM_ASSERT(ApproxChoices.size() == 1 && @@ -723,8 +721,8 @@ extern "C"{ void* wrapper_tensorAdd(const char* hpvm_node_id, void* input_ptr, void* bias_ptr){ - // return tensorAdd(input_ptr, bias_ptr); + // Only mapped to GPU - get a GPU configuration GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); @@ -753,6 +751,8 @@ extern "C"{ int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride){ + INFO("*** TensorPooling Operation \n"); + // return tensorPooling(input_ptr, poolFunction, window_height, window_width, // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride); diff --git a/hpvm/scripts/llvm_installer.sh b/hpvm/scripts/llvm_installer.sh index 6867cf64f4d8cb7c28a43ed3c3b85e4dc1b403cf..21ed6ee6d13ef83e0cc62f643d8e674e7c0e5a90 100755 --- a/hpvm/scripts/llvm_installer.sh +++ b/hpvm/scripts/llvm_installer.sh @@ -28,18 +28,120 @@ LLVM_SRC="llvm-$VERSION.src" HPVM_RT=hpvm-rt/hpvm-rt.bc -read_yn "Build and install HPVM automatically?" AUTOMATE -echo -read -p "Number of threads: " NUM_THREADS +TARGET=all +TARGET_INPUT=all +FLAGGED=false +DOWNLOAD_WEIGHTS=false + +# Get flags +while getopts 'hmj:t:' opt; do + case $opt in + h) + echo + echo + echo "This is the help menu for HPVM installation" + echo + echo "There are 3 options for installation:" + echo + echo "-m is a manual installation flag. This will require you to install HPVM manually by running cmake and make manually." + echo "For more details, refer to README.md. Defaults to automatic installation." + echo + echo "-j is the threads flag. Accepts one argument: how many threads to build with." + echo "To build with 2 threads, enter -j2. Defaults to 2 threads." + echo + echo "-t is the build target flag. Accepts one argument: which build target(s) you would like to build to." + echo "For single target, enter -a ARM. For multiple targets, enter -t \"X86;ARM\"." + echo "Supports the following targets: AArch64, AMDGPU, ARM, BPF, Hexagon, Mips, MSP430, NVPTX, PowerPC, Sparc, SystemZ, X86, XCore." + echo "Defaults to targeting all supported architectures." + echo + echo "If no flags are provided, the script will use command line prompts for all options." + echo + exit + ;; + m) + AUTOMATE=false + FLAGGED=true + ;; + j) + if ! [[ $OPTARG =~ ^[0-9]+$ ]]; then + echo "Invalid argument for # of threads: $OPTARG" + exit -1; + else + NUM_THREADS=$OPTARG + FLAGGED=true + fi + ;; + t) + TARGET=$OPTARG + FLAGGED=true + ;; + esac +done + +if $FLAGGED; then + echo "Running with the following options:" + echo Automated: $AUTOMATE + echo Threads: $NUM_THREADS + echo Targets: $TARGET + echo Download Weights: $DOWNLOAD_WEIGHTS + echo +else + echo "No Flags found. Using command line prompts." + read -p "Build and install HPVM automatically? (y or n): " AUTOMATE_INPUT + + if [[ $AUTOMATE_INPUT == "" ]]; then + echo "No input given. Using default: $AUTOMATE" + elif [[ ! $AUTOMATE_INPUT == "y" ]] && [[ ! $AUTOMATE_INPUT == "n" ]]; then + echo "Invalid input. Using default: $AUTOMATE" + elif [[ $AUTOMATE_INPUT == "n" ]]; then + AUTOMATE=false + fi + -if [ ! $NUM_THREADS -gt 0 ]; then - NUM_THREADS = 2 echo - echo Using $NUM_THREADS threads by default. + read -p "Number of threads: " NUM_THREADS_INPUT + + if [[ $NUM_THREADS_INPUT == "" ]]; then + echo "No input given. Using default: $NUM_THREADS" + elif ! [[ $NUM_THREADS_INPUT =~ ^[0-9]+$ ]]; then + echo "Given input is not an integer. Using default: $NUM_THREADS" + elif [ ! $NUM_THREADS_INPUT -gt 0 ]; then + echo "Given input is not greater than 0. Using default: $NUM_THREADS" + else + NUM_THREADS=$NUM_THREADS_INPUT + fi + + echo + echo + echo "Supports the following options: AArch64, AMDGPU, ARM, BPF, Hexagon, Mips, MSP430, NVPTX, PowerPC, Sparc, SystemZ, X86, XCore." + echo "If building for multiple targets, seperate options with semicolon:" + echo "e.g. X86;ARM" + read -p "Build target: " TARGET_INPUT + if [[ $TARGET_INPUT == "" ]]; then + echo "No input given. Using default: $TARGET" + else + TARGET=$TARGET_INPUT + fi + echo + + read_yn "Download weights necessary to run DNN benchmarks?" LOAD_WEIGHTS + if [[ $LOAD_WEIGHTS == "" ]]; then + echo "No input given. Weights will not be downloaded." + elif [[ $LOAD_WEIGHTS == "n" ]]; then + echo "Weights will not be downloaded." + else + DOWNLOAD_WEIGHTS=$LOAD_WEIGHTS + fi echo -fi + echo "Running with the following options:" + echo Automated: $AUTOMATE + echo Threads: $NUM_THREADS + echo Targets: $TARGET + echo Download Weights: $DOWNLOAD_WEIGHTS + echo +fi if [ -d $LLVM_SRC ]; then echo Found $LLVM_SRC, not dowloading it again! @@ -113,9 +215,16 @@ cd $CURRENT_DIR/llvm_patches echo Patches applied. -if [ ! $AUTOMATE == "y" ]; then +if ! $AUTOMATE ; then echo - echo HPVM not installed. Exiting. + echo "HPVM not installed." + echo "To complete installation, follow these instructions:" + echo " - Create and navigate to a folder \"./build\" " + echo " - Run \"cmake ../llvm [options]\". Find potential options in README.md." + echo " - Run \"make -j<number of threads>\" and then \"make install\"" + echo "For more details refer to README.md." + echo + echo "Exiting." exit fi @@ -136,15 +245,18 @@ if [ ! -d $INSTALL_DIR ]; then fi cd $BUILD_DIR -echo cmake ../$LLVM_SRC -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR -cmake ../$LLVM_SRC -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR +echo cmake ../$LLVM_SRC -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DLLVM_TARGETS_TO_BUILD=$TARGET -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR +cmake ../$LLVM_SRC -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DLLVM_TARGETS_TO_BUILD=$TARGET -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR echo make -j$NUM_THREADS make -j$NUM_THREADS #make install -read_yn "Download weights necessary to run DNN benchmarks?" DOWNLOAD_WEIGHTS if [ $DOWNLOAD_WEIGHTS == "y" ]; then + echo + echo "Downloading weights for DNN benchmarks..." + echo + # First get hands on gdown -- google drive downloader wget https://raw.githubusercontent.com/circulosmeos/gdown.pl/master/gdown.pl -O gdown.pl chmod +x ./gdown.pl diff --git a/hpvm/set_paths.sh b/hpvm/set_paths.sh new file mode 100644 index 0000000000000000000000000000000000000000..42d1be52949ebf780d9fd7836d0429aa970472a5 --- /dev/null +++ b/hpvm/set_paths.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# These paths can be modified by the HPVM user +CUDA_TOOLKIT_PATH=/software/cuda-9.1/ +CUDA_INCLUDE_PATH=$CUDA_TOOLKIT_PATH/include +CUDA_LIB_PATH=$CUDA_TOOLKIT_PATH/lib64/ + +echo "Setting environment paths..." + +# Setting CUDA paths here +export CUDA_BIN_PATH=$CUDA_TOOLKIT_PATH +export CUDA_INCLUDE_PATH=$CUDA_INCLUDE_PATH +export CUDNN_PATH=$CUDA_LIB_PATH +export LIBRARY_PATH=$CUDA_LIB_PATH:$LIBRARY_PATH +export LD_LIBRARY_PATH=$CUDA_LIB_PATH:$LD_LIBRARY_PATH + +echo "Finished setting environment paths!" diff --git a/hpvm/test/dnn_benchmarks/CMakeLists.txt b/hpvm/test/dnn_benchmarks/CMakeLists.txt index 536a85f1a05ddd460975416de44a35e598974766..887b2d1e6c3003cf886a907bcaf51c830dd0e423 100644 --- a/hpvm/test/dnn_benchmarks/CMakeLists.txt +++ b/hpvm/test/dnn_benchmarks/CMakeLists.txt @@ -48,63 +48,81 @@ set(HPVM_RT_PATH ${PROJECT_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.ll) # Compile flags (clang++) set(CLANG_FLAGS -fno-exceptions -std=c++11 -O3) -# Passes flags +# All compilation uses HPVM_DEFAULT_PASSES. set( - HPVM_OPT_PASSES - -load LLVMBuildDFG.so - -load LLVMInPlaceDFGAnalysis.so - -load LLVMDFG2LLVM_CUDNN.so - -load LLVMDFG2LLVM_CPU.so - -load LLVMClearDFG.so - -inplace -dfg2llvm-cudnn -dfg2llvm-cpu -clearDFG + HPVM_DEFAULT_PASSES + LLVMBuildDFG + LLVMInPlaceDFGAnalysis + LLVMDFG2LLVM_CPU + LLVMFuseHPVMTensorNodes + LLVMClearDFG + LLVMGenHPVM ) -# Manually specify dependencies because we're not using cmake "normally" -list( - APPEND DEPEND - clang opt llvm-link # LLVM binaries - hpvm-rt.ll # HPVM runtime - LLVMGenHPVM LLVMBuildDFG LLVMInPlaceDFGAnalysis LLVMDFG2LLVM_CUDNN LLVMDFG2LLVM_CPU LLVMClearDFG # Passes -) +set(WORK_DIR ${CMAKE_CURRENT_BINARY_DIR}) +set(test_compile_targets "") +function(compile_single_benchmark target src_file extra_passes extra_dfg_flags) + foreach(pass ${HPVM_DEFAULT_PASSES} ${extra_passes}) + list(APPEND LOAD_FILE_FLAGS "-load" "${pass}.so") + endforeach() + set( + HPVM_PASSES ${LOAD_FILE_FLAGS} + -buildDFG -inplace -hpvm-fuse ${extra_dfg_flags} -dfg2llvm-cpu -clearDFG + ) -file(GLOB entries ./benchmarks/*) -set(test_targets "") -foreach(entry ${entries}) - if(IS_DIRECTORY ${entry}) - file(GLOB src_files ${entry}/*.cpp) - foreach(src_file ${src_files}) - get_filename_component(target "${src_file}" NAME_WE) - set(target "test_${target}") - list(APPEND test_targets ${target}) + add_custom_command( + OUTPUT "${WORK_DIR}/${target}.ll" DEPENDS ${src_file} clang + COMMAND ${CMAKE_CXX_COMPILER} ${INCLUDE_COMPILER_STRINGS} ${CLANG_FLAGS} -emit-llvm -S ${src_file} + -o ${WORK_DIR}/${target}.ll + ) + add_custom_command( + OUTPUT "${WORK_DIR}/${target}.hpvm.ll" + DEPENDS "${WORK_DIR}/${target}.ll" opt LLVMGenHPVM + COMMAND ${LLVM_OPT} -load LLVMGenHPVM.so -genhpvm -globaldce -S ${WORK_DIR}/${target}.ll + -o ${WORK_DIR}/${target}.hpvm.ll + ) + add_custom_command( + OUTPUT "${WORK_DIR}/${target}.llvm.ll" + DEPENDS "${WORK_DIR}/${target}.hpvm.ll" opt ${HPVM_DEFAULT_PASSES} ${extra_passes} + COMMAND ${LLVM_OPT} ${HPVM_PASSES} -S ${WORK_DIR}/${target}.hpvm.ll + -o ${WORK_DIR}/${target}.llvm.ll + ) + add_custom_command( + OUTPUT "${WORK_DIR}/${target}.linked.bc" + DEPENDS "${WORK_DIR}/${target}.llvm.ll" hpvm-rt.ll llvm-link + COMMAND ${LLVM_LINK} ${WORK_DIR}/${target}.llvm.ll ${HPVM_RT_PATH} + -o ${WORK_DIR}/${target}.linked.bc + ) + add_custom_command( + OUTPUT "${WORK_DIR}/${target}" + DEPENDS "${WORK_DIR}/${target}.linked.bc" tensor_runtime gpu_profiler promise_profiler + COMMAND ${CMAKE_CXX_COMPILER} + ${WORK_DIR}/${target}.linked.bc + $<TARGET_FILE:tensor_runtime> $<TARGET_FILE:gpu_profiler> $<TARGET_FILE:promise_profiler> + -o ${WORK_DIR}/${target} ${LINKER_FLAGS} + ) + add_custom_target(${target} DEPENDS "${WORK_DIR}/${target}") - set(WORK_DIR ${CMAKE_CURRENT_BINARY_DIR}) - add_custom_command( - OUTPUT "${target}.ll" DEPENDS ${src_file} - COMMAND ${CMAKE_CXX_COMPILER} ${INCLUDE_COMPILER_STRINGS} ${CLANG_FLAGS} -emit-llvm -S ${src_file} - -o ${WORK_DIR}/${target}.ll - ) - add_custom_command( - OUTPUT - "${WORK_DIR}/${target}.hpvm.ll" - "${WORK_DIR}/${target}_cudnn.bc" - "${WORK_DIR}/${target}_cudnn_linked.bc" - "${WORK_DIR}/${target}_cudnn_linked" - DEPENDS "${target}.ll" - COMMAND ${LLVM_OPT} -load LLVMGenHPVM.so -genhpvm -globaldce -S ${WORK_DIR}/${target}.ll - -o ${WORK_DIR}/${target}.hpvm.ll - COMMAND ${LLVM_OPT} ${HPVM_OPT_PASSES} ${WORK_DIR}/${target}.hpvm.ll - -o ${WORK_DIR}/${target}_cudnn.bc - COMMAND ${LLVM_LINK} ${WORK_DIR}/${target}_cudnn.bc ${HPVM_RT_PATH} - -o ${WORK_DIR}/${target}_cudnn_linked.bc - COMMAND ${CMAKE_CXX_COMPILER} - ${WORK_DIR}/${target}_cudnn_linked.bc - $<TARGET_FILE:tensor_runtime> $<TARGET_FILE:gpu_profiler> $<TARGET_FILE:promise_profiler> - -o ${WORK_DIR}/${target} ${LINKER_FLAGS} - ) - add_custom_target(${target} DEPENDS "${WORK_DIR}/${target}_cudnn_linked") - add_dependencies(${target} ${DEPEND}) - endforeach() - endif() -endforeach(entry) -message(STATUS "List of test dnn benchmarks: ${test_targets}") + set(test_compile_targets ${test_compile_targets} ${target} PARENT_SCOPE) +endfunction(compile_single_benchmark) +file(GLOB entries ./benchmarks/*) +foreach(dir ${entries}) + get_filename_component(dirname "${dir}" NAME) + compile_single_benchmark( + "test_${dirname}" ${dir}/${dirname}.cpp LLVMDFG2LLVM_CUDNN -dfg2llvm-cudnn + ) + set( + loop_extra_flags + -dfg2llvm-wrapperapi + -quantization-levels-filename=${dir}/data/quant_ranges_rt.txt + -configuration-inputs-filename=${dir}/data/tuner_confs.txt + ) + compile_single_benchmark( + "test_${dirname}_loop" ${dir}/${dirname}_loop.cpp + LLVMDFG2LLVM_WrapperAPI "${loop_extra_flags}" + ) +endforeach(dir) +message(STATUS "List of test dnn benchmarks: ${test_compile_targets}") +add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets}) +message(STATUS "Target name for compiling all dnn benchmarks: dnn_benchmarks") diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet.cpp index 82d68baa6c436002c0a933b967bdb0dbf552c3d3..4dcd57c8164c8bd73280d6224c44bb8b9ec9d6f0 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet.cpp @@ -192,7 +192,7 @@ void root(void *input, size_t input_bytes, void *conv2d_1_w, __hpvm__attributes(13, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, - conv2d_5_b, dense_1_w, dense_1_b, 0); + conv2d_5_b, dense_1_w, dense_1_b, 1, input); void *var_0 = __hpvm__createNodeND(0, var_0_node); @@ -366,10 +366,11 @@ typedef struct __attribute__((__packed__)) { int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; + std::string input_path = dir_prefix + std::string("input.bin"); void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32); std::string labels_path = dir_prefix + std::string("labels.bin"); - uint8_t *labels = readLabels(labels_path.c_str(), 5000); + uint32_t *labels = readLabels3(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11); @@ -435,15 +436,14 @@ int main() { args->dense_1_w_bytes = 0; args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; - void *result = static_cast<RootIn *>(args)->input; hpvm_request_tensor(result, 0); __hpvm__cleanup(); - computeAccuracy2(labels, 5000, result); + computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet_loop.cpp index e60efe728da794b6ba73fc02dbb92b8277d4de7e..86b3e7eb93bb6040af97007741853ef6474ddb3d 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 5, 5, 1, 1); @@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1); @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_6_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) { } void var_7_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1) { } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_10_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_13_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_16_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) { } void var_17_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -363,11 +363,12 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("input.bin"); // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); uint8_t *labels = readLabels(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = @@ -461,7 +462,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e45529d84b54fc13f19e39f2da94538d54349aa --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt @@ -0,0 +1,7 @@ +1 -1.88164262419 2.09340954985 -0.33087718 0.3323643 -0.7782218 0.6020472 -0.978641152382 0.998945295811 +2 -0.978641152382 0.998945295811 -0.2095158 0.33543423 -0.45020863 0.30596754 -0.999703943729 0.999930202961 +3 -0.999703943729 0.999930202961 -0.1715614 0.17037082 -0.6519161 0.5939945 -0.999933600426 0.999940037727 +4 -0.999933600426 0.999940037727 -0.15575546 0.14456555 -0.55873865 0.4704539 -0.99999910593 0.999999344349 +5 -0.99999910593 0.999999344349 -0.16108225 0.16864482 -0.22135437 0.10401678 -0.999434411526 0.999634206295 +6 -0.999434411526 0.999634206295 -0.18183032 0.19018902 -0.07189204 0.106005594 -15.0765653801 19.4225852203 +7 0 0 0 0 0 0 0 0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6d177c90d5a2890afa5387d4c2a50de1cb6c852 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt @@ -0,0 +1,11 @@ +2000 ++++++ +conf1 3.86 0 79.1 0.0 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 tanh fp32 1 +4 gpu conv fp32 1 add fp32 1 tanh fp32 1 +5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +6 gpu mul fp32 1 add fp32 1 +7 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2.cpp index df193d37ebd3fef1a52f4472514c5a1d137a8f6e..bc1f9fa18e6faeed60d171ec90c4dc891136b1ad 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2.cpp @@ -412,9 +412,10 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3); @@ -511,7 +512,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2_loop.cpp index 9482c5860dfb6688de17228980de71b1ae7844c1..59161a118d6e9baa9196d045a072993c733b3697 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) { } void var_6_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) { } void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_9_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) { } void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) { } void var_13_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_16_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) { } void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_19_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) { } void var_20_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1) { } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -412,9 +412,10 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3); @@ -520,7 +521,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..488c5521dce160487ef3f3ee149914047f6274b1 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt @@ -0,0 +1,8 @@ +1 -1.8816435 2.0934134 -0.5421946 0.3710851 -0.06697306 0.040868897 -0.775027394891 0.779944300652 +2 -0.775027394891 0.779944300652 -0.42474225 0.31460348 -0.3557253 -0.17281663 -0.808667064309 0.983953297734 +3 -0.808667064309 0.983953297734 -0.44134507 0.79587924 -0.80424446 0.75330096 -0.995678424835 0.998566448689 +4 -0.995678424835 0.998566448689 -0.2883836 0.31025785 -0.6353164 0.29015934 -0.993219196796 0.992379009724 +5 -0.993219196796 0.992379009724 -0.2792431 0.37689754 -1.1379756 1.2391574 -0.999901354313 0.999910891056 +6 -0.999901354313 0.999910891056 -0.27078503 0.27942517 -0.503003 0.12762362 -0.991036117375 0.971404970288 +7 -0.991036117375 0.971404970288 -0.24273404 0.5845544 -0.53745 0.558251 -119.27973732 -25.2262819576 +8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..9d6f975869964e8bb666262923172eac42a43151 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt @@ -0,0 +1,12 @@ +2000 ++++++ +conf1 2.64294896823 0 84.24999995 -0.05999995000000524 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 tanh fp32 1 +4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 tanh fp32 1 +6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +7 gpu mul fp32 1 add fp32 1 +8 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp index 4c76cc7273f0d63718e324f17b22bbbd4f59b665..466e311577d1e1d46d2e0c6a2a624cc21900be4f 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp @@ -11,219 +11,219 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 4, 4); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 4, 4); + __hpvm__return(2, r, (size_t)0); } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_2_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_3_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); + __hpvm__return(2, r, (size_t)0); } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_6_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_7_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); + __hpvm__return(2, r, (size_t)0); } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_10_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_13_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_16_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_17_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); + __hpvm__return(2, r, (size_t)0); } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_mul(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_20_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_mul(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_23_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_mul(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_26_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_softmax(t1); + __hpvm__return(2, r, (size_t)0); } void root(void *input, size_t input_bytes, void *conv2d_1_w, @@ -239,181 +239,181 @@ void root(void *input, size_t input_bytes, void *conv2d_1_w, void *dense_3_w, size_t dense_3_w_bytes, void *dense_3_b, size_t dense_3_b_bytes) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, dense_3_w, dense_3_b, 0); - void *var_0 = __visc__createNodeND(0, var_0_node); + void *var_0 = __hpvm__createNodeND(0, var_0_node); - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); + __hpvm__bindIn(var_0, 0, 0, 0); + __hpvm__bindIn(var_0, 1, 1, 0); + __hpvm__bindIn(var_0, 2, 2, 0); + __hpvm__bindIn(var_0, 3, 3, 0); - void *var_1 = __visc__createNodeND(0, var_1_node); + void *var_1 = __hpvm__createNodeND(0, var_1_node); - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); + __hpvm__edge(var_0, var_1, 1, 0, 0, 0); + __hpvm__edge(var_0, var_1, 1, 1, 1, 0); + __hpvm__bindIn(var_1, 4, 2, 0); + __hpvm__bindIn(var_1, 5, 3, 0); - void *var_2 = __visc__createNodeND(0, var_2_node); + void *var_2 = __hpvm__createNodeND(0, var_2_node); - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); + __hpvm__edge(var_1, var_2, 1, 0, 0, 0); + __hpvm__edge(var_1, var_2, 1, 1, 1, 0); - void *var_3 = __visc__createNodeND(0, var_3_node); + void *var_3 = __hpvm__createNodeND(0, var_3_node); - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); + __hpvm__edge(var_2, var_3, 1, 0, 0, 0); + __hpvm__edge(var_2, var_3, 1, 1, 1, 0); - void *var_4 = __visc__createNodeND(0, var_4_node); + void *var_4 = __hpvm__createNodeND(0, var_4_node); - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 6, 2, 0); - __visc__bindIn(var_4, 7, 3, 0); + __hpvm__edge(var_3, var_4, 1, 0, 0, 0); + __hpvm__edge(var_3, var_4, 1, 1, 1, 0); + __hpvm__bindIn(var_4, 6, 2, 0); + __hpvm__bindIn(var_4, 7, 3, 0); - void *var_5 = __visc__createNodeND(0, var_5_node); + void *var_5 = __hpvm__createNodeND(0, var_5_node); - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - __visc__bindIn(var_5, 8, 2, 0); - __visc__bindIn(var_5, 9, 3, 0); + __hpvm__edge(var_4, var_5, 1, 0, 0, 0); + __hpvm__edge(var_4, var_5, 1, 1, 1, 0); + __hpvm__bindIn(var_5, 8, 2, 0); + __hpvm__bindIn(var_5, 9, 3, 0); - void *var_6 = __visc__createNodeND(0, var_6_node); + void *var_6 = __hpvm__createNodeND(0, var_6_node); - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); + __hpvm__edge(var_5, var_6, 1, 0, 0, 0); + __hpvm__edge(var_5, var_6, 1, 1, 1, 0); - void *var_7 = __visc__createNodeND(0, var_7_node); + void *var_7 = __hpvm__createNodeND(0, var_7_node); - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); + __hpvm__edge(var_6, var_7, 1, 0, 0, 0); + __hpvm__edge(var_6, var_7, 1, 1, 1, 0); - void *var_8 = __visc__createNodeND(0, var_8_node); + void *var_8 = __hpvm__createNodeND(0, var_8_node); - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - __visc__bindIn(var_8, 10, 2, 0); - __visc__bindIn(var_8, 11, 3, 0); + __hpvm__edge(var_7, var_8, 1, 0, 0, 0); + __hpvm__edge(var_7, var_8, 1, 1, 1, 0); + __hpvm__bindIn(var_8, 10, 2, 0); + __hpvm__bindIn(var_8, 11, 3, 0); - void *var_9 = __visc__createNodeND(0, var_9_node); + void *var_9 = __hpvm__createNodeND(0, var_9_node); - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - __visc__bindIn(var_9, 12, 2, 0); - __visc__bindIn(var_9, 13, 3, 0); + __hpvm__edge(var_8, var_9, 1, 0, 0, 0); + __hpvm__edge(var_8, var_9, 1, 1, 1, 0); + __hpvm__bindIn(var_9, 12, 2, 0); + __hpvm__bindIn(var_9, 13, 3, 0); - void *var_10 = __visc__createNodeND(0, var_10_node); + void *var_10 = __hpvm__createNodeND(0, var_10_node); - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); + __hpvm__edge(var_9, var_10, 1, 0, 0, 0); + __hpvm__edge(var_9, var_10, 1, 1, 1, 0); - void *var_11 = __visc__createNodeND(0, var_11_node); + void *var_11 = __hpvm__createNodeND(0, var_11_node); - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - __visc__bindIn(var_11, 14, 2, 0); - __visc__bindIn(var_11, 15, 3, 0); + __hpvm__edge(var_10, var_11, 1, 0, 0, 0); + __hpvm__edge(var_10, var_11, 1, 1, 1, 0); + __hpvm__bindIn(var_11, 14, 2, 0); + __hpvm__bindIn(var_11, 15, 3, 0); - void *var_12 = __visc__createNodeND(0, var_12_node); + void *var_12 = __hpvm__createNodeND(0, var_12_node); - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - __visc__bindIn(var_12, 16, 2, 0); - __visc__bindIn(var_12, 17, 3, 0); + __hpvm__edge(var_11, var_12, 1, 0, 0, 0); + __hpvm__edge(var_11, var_12, 1, 1, 1, 0); + __hpvm__bindIn(var_12, 16, 2, 0); + __hpvm__bindIn(var_12, 17, 3, 0); - void *var_13 = __visc__createNodeND(0, var_13_node); + void *var_13 = __hpvm__createNodeND(0, var_13_node); - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); + __hpvm__edge(var_12, var_13, 1, 0, 0, 0); + __hpvm__edge(var_12, var_13, 1, 1, 1, 0); - void *var_14 = __visc__createNodeND(0, var_14_node); + void *var_14 = __hpvm__createNodeND(0, var_14_node); - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - __visc__bindIn(var_14, 18, 2, 0); - __visc__bindIn(var_14, 19, 3, 0); + __hpvm__edge(var_13, var_14, 1, 0, 0, 0); + __hpvm__edge(var_13, var_14, 1, 1, 1, 0); + __hpvm__bindIn(var_14, 18, 2, 0); + __hpvm__bindIn(var_14, 19, 3, 0); - void *var_15 = __visc__createNodeND(0, var_15_node); + void *var_15 = __hpvm__createNodeND(0, var_15_node); - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 20, 2, 0); - __visc__bindIn(var_15, 21, 3, 0); + __hpvm__edge(var_14, var_15, 1, 0, 0, 0); + __hpvm__edge(var_14, var_15, 1, 1, 1, 0); + __hpvm__bindIn(var_15, 20, 2, 0); + __hpvm__bindIn(var_15, 21, 3, 0); - void *var_16 = __visc__createNodeND(0, var_16_node); + void *var_16 = __hpvm__createNodeND(0, var_16_node); - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); + __hpvm__edge(var_15, var_16, 1, 0, 0, 0); + __hpvm__edge(var_15, var_16, 1, 1, 1, 0); - void *var_17 = __visc__createNodeND(0, var_17_node); + void *var_17 = __hpvm__createNodeND(0, var_17_node); - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); + __hpvm__edge(var_16, var_17, 1, 0, 0, 0); + __hpvm__edge(var_16, var_17, 1, 1, 1, 0); - void *var_18 = __visc__createNodeND(0, var_18_node); + void *var_18 = __hpvm__createNodeND(0, var_18_node); - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 22, 2, 0); - __visc__bindIn(var_18, 23, 3, 0); + __hpvm__edge(var_17, var_18, 1, 0, 0, 0); + __hpvm__edge(var_17, var_18, 1, 1, 1, 0); + __hpvm__bindIn(var_18, 22, 2, 0); + __hpvm__bindIn(var_18, 23, 3, 0); - void *var_19 = __visc__createNodeND(0, var_19_node); + void *var_19 = __hpvm__createNodeND(0, var_19_node); - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - __visc__bindIn(var_19, 24, 2, 0); - __visc__bindIn(var_19, 25, 3, 0); + __hpvm__edge(var_18, var_19, 1, 0, 0, 0); + __hpvm__edge(var_18, var_19, 1, 1, 1, 0); + __hpvm__bindIn(var_19, 24, 2, 0); + __hpvm__bindIn(var_19, 25, 3, 0); - void *var_20 = __visc__createNodeND(0, var_20_node); + void *var_20 = __hpvm__createNodeND(0, var_20_node); - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); + __hpvm__edge(var_19, var_20, 1, 0, 0, 0); + __hpvm__edge(var_19, var_20, 1, 1, 1, 0); - void *var_21 = __visc__createNodeND(0, var_21_node); + void *var_21 = __hpvm__createNodeND(0, var_21_node); - __visc__edge(var_20, var_21, 1, 0, 0, 0); - __visc__edge(var_20, var_21, 1, 1, 1, 0); - __visc__bindIn(var_21, 26, 2, 0); - __visc__bindIn(var_21, 27, 3, 0); + __hpvm__edge(var_20, var_21, 1, 0, 0, 0); + __hpvm__edge(var_20, var_21, 1, 1, 1, 0); + __hpvm__bindIn(var_21, 26, 2, 0); + __hpvm__bindIn(var_21, 27, 3, 0); - void *var_22 = __visc__createNodeND(0, var_22_node); + void *var_22 = __hpvm__createNodeND(0, var_22_node); - __visc__edge(var_21, var_22, 1, 0, 0, 0); - __visc__edge(var_21, var_22, 1, 1, 1, 0); - __visc__bindIn(var_22, 28, 2, 0); - __visc__bindIn(var_22, 29, 3, 0); + __hpvm__edge(var_21, var_22, 1, 0, 0, 0); + __hpvm__edge(var_21, var_22, 1, 1, 1, 0); + __hpvm__bindIn(var_22, 28, 2, 0); + __hpvm__bindIn(var_22, 29, 3, 0); - void *var_23 = __visc__createNodeND(0, var_23_node); + void *var_23 = __hpvm__createNodeND(0, var_23_node); - __visc__edge(var_22, var_23, 1, 0, 0, 0); - __visc__edge(var_22, var_23, 1, 1, 1, 0); + __hpvm__edge(var_22, var_23, 1, 0, 0, 0); + __hpvm__edge(var_22, var_23, 1, 1, 1, 0); - void *var_24 = __visc__createNodeND(0, var_24_node); + void *var_24 = __hpvm__createNodeND(0, var_24_node); - __visc__edge(var_23, var_24, 1, 0, 0, 0); - __visc__edge(var_23, var_24, 1, 1, 1, 0); - __visc__bindIn(var_24, 30, 2, 0); - __visc__bindIn(var_24, 31, 3, 0); + __hpvm__edge(var_23, var_24, 1, 0, 0, 0); + __hpvm__edge(var_23, var_24, 1, 1, 1, 0); + __hpvm__bindIn(var_24, 30, 2, 0); + __hpvm__bindIn(var_24, 31, 3, 0); - void *var_25 = __visc__createNodeND(0, var_25_node); + void *var_25 = __hpvm__createNodeND(0, var_25_node); - __visc__edge(var_24, var_25, 1, 0, 0, 0); - __visc__edge(var_24, var_25, 1, 1, 1, 0); - __visc__bindIn(var_25, 32, 2, 0); - __visc__bindIn(var_25, 33, 3, 0); + __hpvm__edge(var_24, var_25, 1, 0, 0, 0); + __hpvm__edge(var_24, var_25, 1, 1, 1, 0); + __hpvm__bindIn(var_25, 32, 2, 0); + __hpvm__bindIn(var_25, 33, 3, 0); - void *var_26 = __visc__createNodeND(0, var_26_node); + void *var_26 = __hpvm__createNodeND(0, var_26_node); - __visc__edge(var_25, var_26, 1, 0, 0, 0); - __visc__edge(var_25, var_26, 1, 1, 1, 0); + __hpvm__edge(var_25, var_26, 1, 0, 0, 0); + __hpvm__edge(var_25, var_26, 1, 1, 1, 0); - __visc__bindOut(var_26, 0, 0, 0); - __visc__bindOut(var_26, 1, 1, 0); + __hpvm__bindOut(var_26, 0, 0, 0); + __hpvm__bindOut(var_26, 1, 1, 0); } struct ret_t { @@ -463,9 +463,9 @@ typedef struct __attribute__((__packed__)) { int main() { std::string dir_prefix = - std::string("/shared/hsharif3/alexnet_imagenet_tune/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); + std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; + std::string input_path = dir_prefix + std::string("input.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11); @@ -519,7 +519,7 @@ int main() { uint32_t *labels = readLabels3(labels_path.c_str(), 1000); - __visc__init(); + __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); args->input = input; @@ -557,14 +557,14 @@ int main() { args->dense_3_b = dense_3_b; args->dense_3_b_bytes = 0; - void *dfg = __visc__launch(0, root, (void *)args); + void *dfg = __hpvm__launch(0, root, (void *)args); - __visc__wait(dfg); + __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); - __visc__cleanup(); + __hpvm__cleanup(); computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp index abed45d5ff385e7117523e7d4e6e1b7a45b05018..340e0aa1194ac57e96eadd1669a97fa25fdd0c44 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp @@ -11,219 +11,219 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 4, 4); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 4, 4); + __hpvm__return(2, r, (size_t)0); } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_2_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_3_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); + __hpvm__return(2, r, (size_t)0); } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_6_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_7_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); + __hpvm__return(2, r, (size_t)0); } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_10_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_13_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); + __hpvm__return(2, r, (size_t)0); } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_16_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_17_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); + __hpvm__return(2, r, (size_t)0); } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_mul(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_20_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_mul(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_23_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_relu(t1); + __hpvm__return(2, r, (size_t)0); } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_mul(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); + __hpvm__hint(hpvm::TENSOR_TARGET); + __hpvm__attributes(2, t1, t2, 0); - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_add(t1, t2); + __hpvm__return(2, r, (size_t)0); } void var_26_node(void *t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __hpvm__hint(hpvm::CUDNN_TARGET); + __hpvm__attributes(1, t1, 0); - void *r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t)0); + void *r = __hpvm__tensor_softmax(t1); + __hpvm__return(2, r, (size_t)0); } void root(void *input, size_t input_bytes, void *conv2d_1_w, @@ -239,181 +239,181 @@ void root(void *input, size_t input_bytes, void *conv2d_1_w, void *dense_3_w, size_t dense_3_w_bytes, void *dense_3_b, size_t dense_3_b_bytes) { - __visc__hint(visc::CPU_TARGET); - __visc__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, + __hpvm__hint(hpvm::CPU_TARGET); + __hpvm__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, dense_3_w, dense_3_b, 0); - void *var_0 = __visc__createNodeND(0, var_0_node); + void *var_0 = __hpvm__createNodeND(0, var_0_node); - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); + __hpvm__bindIn(var_0, 0, 0, 0); + __hpvm__bindIn(var_0, 1, 1, 0); + __hpvm__bindIn(var_0, 2, 2, 0); + __hpvm__bindIn(var_0, 3, 3, 0); - void *var_1 = __visc__createNodeND(0, var_1_node); + void *var_1 = __hpvm__createNodeND(0, var_1_node); - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); + __hpvm__edge(var_0, var_1, 1, 0, 0, 0); + __hpvm__edge(var_0, var_1, 1, 1, 1, 0); + __hpvm__bindIn(var_1, 4, 2, 0); + __hpvm__bindIn(var_1, 5, 3, 0); - void *var_2 = __visc__createNodeND(0, var_2_node); + void *var_2 = __hpvm__createNodeND(0, var_2_node); - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); + __hpvm__edge(var_1, var_2, 1, 0, 0, 0); + __hpvm__edge(var_1, var_2, 1, 1, 1, 0); - void *var_3 = __visc__createNodeND(0, var_3_node); + void *var_3 = __hpvm__createNodeND(0, var_3_node); - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); + __hpvm__edge(var_2, var_3, 1, 0, 0, 0); + __hpvm__edge(var_2, var_3, 1, 1, 1, 0); - void *var_4 = __visc__createNodeND(0, var_4_node); + void *var_4 = __hpvm__createNodeND(0, var_4_node); - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 6, 2, 0); - __visc__bindIn(var_4, 7, 3, 0); + __hpvm__edge(var_3, var_4, 1, 0, 0, 0); + __hpvm__edge(var_3, var_4, 1, 1, 1, 0); + __hpvm__bindIn(var_4, 6, 2, 0); + __hpvm__bindIn(var_4, 7, 3, 0); - void *var_5 = __visc__createNodeND(0, var_5_node); + void *var_5 = __hpvm__createNodeND(0, var_5_node); - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - __visc__bindIn(var_5, 8, 2, 0); - __visc__bindIn(var_5, 9, 3, 0); + __hpvm__edge(var_4, var_5, 1, 0, 0, 0); + __hpvm__edge(var_4, var_5, 1, 1, 1, 0); + __hpvm__bindIn(var_5, 8, 2, 0); + __hpvm__bindIn(var_5, 9, 3, 0); - void *var_6 = __visc__createNodeND(0, var_6_node); + void *var_6 = __hpvm__createNodeND(0, var_6_node); - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); + __hpvm__edge(var_5, var_6, 1, 0, 0, 0); + __hpvm__edge(var_5, var_6, 1, 1, 1, 0); - void *var_7 = __visc__createNodeND(0, var_7_node); + void *var_7 = __hpvm__createNodeND(0, var_7_node); - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); + __hpvm__edge(var_6, var_7, 1, 0, 0, 0); + __hpvm__edge(var_6, var_7, 1, 1, 1, 0); - void *var_8 = __visc__createNodeND(0, var_8_node); + void *var_8 = __hpvm__createNodeND(0, var_8_node); - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - __visc__bindIn(var_8, 10, 2, 0); - __visc__bindIn(var_8, 11, 3, 0); + __hpvm__edge(var_7, var_8, 1, 0, 0, 0); + __hpvm__edge(var_7, var_8, 1, 1, 1, 0); + __hpvm__bindIn(var_8, 10, 2, 0); + __hpvm__bindIn(var_8, 11, 3, 0); - void *var_9 = __visc__createNodeND(0, var_9_node); + void *var_9 = __hpvm__createNodeND(0, var_9_node); - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - __visc__bindIn(var_9, 12, 2, 0); - __visc__bindIn(var_9, 13, 3, 0); + __hpvm__edge(var_8, var_9, 1, 0, 0, 0); + __hpvm__edge(var_8, var_9, 1, 1, 1, 0); + __hpvm__bindIn(var_9, 12, 2, 0); + __hpvm__bindIn(var_9, 13, 3, 0); - void *var_10 = __visc__createNodeND(0, var_10_node); + void *var_10 = __hpvm__createNodeND(0, var_10_node); - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); + __hpvm__edge(var_9, var_10, 1, 0, 0, 0); + __hpvm__edge(var_9, var_10, 1, 1, 1, 0); - void *var_11 = __visc__createNodeND(0, var_11_node); + void *var_11 = __hpvm__createNodeND(0, var_11_node); - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - __visc__bindIn(var_11, 14, 2, 0); - __visc__bindIn(var_11, 15, 3, 0); + __hpvm__edge(var_10, var_11, 1, 0, 0, 0); + __hpvm__edge(var_10, var_11, 1, 1, 1, 0); + __hpvm__bindIn(var_11, 14, 2, 0); + __hpvm__bindIn(var_11, 15, 3, 0); - void *var_12 = __visc__createNodeND(0, var_12_node); + void *var_12 = __hpvm__createNodeND(0, var_12_node); - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - __visc__bindIn(var_12, 16, 2, 0); - __visc__bindIn(var_12, 17, 3, 0); + __hpvm__edge(var_11, var_12, 1, 0, 0, 0); + __hpvm__edge(var_11, var_12, 1, 1, 1, 0); + __hpvm__bindIn(var_12, 16, 2, 0); + __hpvm__bindIn(var_12, 17, 3, 0); - void *var_13 = __visc__createNodeND(0, var_13_node); + void *var_13 = __hpvm__createNodeND(0, var_13_node); - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); + __hpvm__edge(var_12, var_13, 1, 0, 0, 0); + __hpvm__edge(var_12, var_13, 1, 1, 1, 0); - void *var_14 = __visc__createNodeND(0, var_14_node); + void *var_14 = __hpvm__createNodeND(0, var_14_node); - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - __visc__bindIn(var_14, 18, 2, 0); - __visc__bindIn(var_14, 19, 3, 0); + __hpvm__edge(var_13, var_14, 1, 0, 0, 0); + __hpvm__edge(var_13, var_14, 1, 1, 1, 0); + __hpvm__bindIn(var_14, 18, 2, 0); + __hpvm__bindIn(var_14, 19, 3, 0); - void *var_15 = __visc__createNodeND(0, var_15_node); + void *var_15 = __hpvm__createNodeND(0, var_15_node); - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 20, 2, 0); - __visc__bindIn(var_15, 21, 3, 0); + __hpvm__edge(var_14, var_15, 1, 0, 0, 0); + __hpvm__edge(var_14, var_15, 1, 1, 1, 0); + __hpvm__bindIn(var_15, 20, 2, 0); + __hpvm__bindIn(var_15, 21, 3, 0); - void *var_16 = __visc__createNodeND(0, var_16_node); + void *var_16 = __hpvm__createNodeND(0, var_16_node); - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); + __hpvm__edge(var_15, var_16, 1, 0, 0, 0); + __hpvm__edge(var_15, var_16, 1, 1, 1, 0); - void *var_17 = __visc__createNodeND(0, var_17_node); + void *var_17 = __hpvm__createNodeND(0, var_17_node); - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); + __hpvm__edge(var_16, var_17, 1, 0, 0, 0); + __hpvm__edge(var_16, var_17, 1, 1, 1, 0); - void *var_18 = __visc__createNodeND(0, var_18_node); + void *var_18 = __hpvm__createNodeND(0, var_18_node); - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 22, 2, 0); - __visc__bindIn(var_18, 23, 3, 0); + __hpvm__edge(var_17, var_18, 1, 0, 0, 0); + __hpvm__edge(var_17, var_18, 1, 1, 1, 0); + __hpvm__bindIn(var_18, 22, 2, 0); + __hpvm__bindIn(var_18, 23, 3, 0); - void *var_19 = __visc__createNodeND(0, var_19_node); + void *var_19 = __hpvm__createNodeND(0, var_19_node); - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - __visc__bindIn(var_19, 24, 2, 0); - __visc__bindIn(var_19, 25, 3, 0); + __hpvm__edge(var_18, var_19, 1, 0, 0, 0); + __hpvm__edge(var_18, var_19, 1, 1, 1, 0); + __hpvm__bindIn(var_19, 24, 2, 0); + __hpvm__bindIn(var_19, 25, 3, 0); - void *var_20 = __visc__createNodeND(0, var_20_node); + void *var_20 = __hpvm__createNodeND(0, var_20_node); - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); + __hpvm__edge(var_19, var_20, 1, 0, 0, 0); + __hpvm__edge(var_19, var_20, 1, 1, 1, 0); - void *var_21 = __visc__createNodeND(0, var_21_node); + void *var_21 = __hpvm__createNodeND(0, var_21_node); - __visc__edge(var_20, var_21, 1, 0, 0, 0); - __visc__edge(var_20, var_21, 1, 1, 1, 0); - __visc__bindIn(var_21, 26, 2, 0); - __visc__bindIn(var_21, 27, 3, 0); + __hpvm__edge(var_20, var_21, 1, 0, 0, 0); + __hpvm__edge(var_20, var_21, 1, 1, 1, 0); + __hpvm__bindIn(var_21, 26, 2, 0); + __hpvm__bindIn(var_21, 27, 3, 0); - void *var_22 = __visc__createNodeND(0, var_22_node); + void *var_22 = __hpvm__createNodeND(0, var_22_node); - __visc__edge(var_21, var_22, 1, 0, 0, 0); - __visc__edge(var_21, var_22, 1, 1, 1, 0); - __visc__bindIn(var_22, 28, 2, 0); - __visc__bindIn(var_22, 29, 3, 0); + __hpvm__edge(var_21, var_22, 1, 0, 0, 0); + __hpvm__edge(var_21, var_22, 1, 1, 1, 0); + __hpvm__bindIn(var_22, 28, 2, 0); + __hpvm__bindIn(var_22, 29, 3, 0); - void *var_23 = __visc__createNodeND(0, var_23_node); + void *var_23 = __hpvm__createNodeND(0, var_23_node); - __visc__edge(var_22, var_23, 1, 0, 0, 0); - __visc__edge(var_22, var_23, 1, 1, 1, 0); + __hpvm__edge(var_22, var_23, 1, 0, 0, 0); + __hpvm__edge(var_22, var_23, 1, 1, 1, 0); - void *var_24 = __visc__createNodeND(0, var_24_node); + void *var_24 = __hpvm__createNodeND(0, var_24_node); - __visc__edge(var_23, var_24, 1, 0, 0, 0); - __visc__edge(var_23, var_24, 1, 1, 1, 0); - __visc__bindIn(var_24, 30, 2, 0); - __visc__bindIn(var_24, 31, 3, 0); + __hpvm__edge(var_23, var_24, 1, 0, 0, 0); + __hpvm__edge(var_23, var_24, 1, 1, 1, 0); + __hpvm__bindIn(var_24, 30, 2, 0); + __hpvm__bindIn(var_24, 31, 3, 0); - void *var_25 = __visc__createNodeND(0, var_25_node); + void *var_25 = __hpvm__createNodeND(0, var_25_node); - __visc__edge(var_24, var_25, 1, 0, 0, 0); - __visc__edge(var_24, var_25, 1, 1, 1, 0); - __visc__bindIn(var_25, 32, 2, 0); - __visc__bindIn(var_25, 33, 3, 0); + __hpvm__edge(var_24, var_25, 1, 0, 0, 0); + __hpvm__edge(var_24, var_25, 1, 1, 1, 0); + __hpvm__bindIn(var_25, 32, 2, 0); + __hpvm__bindIn(var_25, 33, 3, 0); - void *var_26 = __visc__createNodeND(0, var_26_node); + void *var_26 = __hpvm__createNodeND(0, var_26_node); - __visc__edge(var_25, var_26, 1, 0, 0, 0); - __visc__edge(var_25, var_26, 1, 1, 1, 0); + __hpvm__edge(var_25, var_26, 1, 0, 0, 0); + __hpvm__edge(var_25, var_26, 1, 1, 1, 0); - __visc__bindOut(var_26, 0, 0, 0); - __visc__bindOut(var_26, 1, 1, 0); + __hpvm__bindOut(var_26, 0, 0, 0); + __hpvm__bindOut(var_26, 1, 1, 0); } struct ret_t { @@ -463,9 +463,10 @@ typedef struct __attribute__((__packed__)) { int main() { std::string dir_prefix = - std::string("/shared/hsharif3/alexnet_imagenet_tune/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); + std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; + + std::string input_path = dir_prefix + std::string("input.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11); @@ -519,7 +520,7 @@ int main() { // uint32_t* labels = readLabels3(labels_path.c_str(), 1000); - __visc__init(); + __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); // args->input = input; @@ -576,11 +577,11 @@ int main() { args->input = input; args->input_bytes = 0; - void *dfg = __visc__launch(0, root, (void *)args); + void *dfg = __hpvm__launch(0, root, (void *)args); - __visc__wait(dfg); + __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); @@ -590,7 +591,7 @@ int main() { } stopProfiling(); - __visc__cleanup(); + __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..897937563bac79bdc4592c6a6e7ce46e41e75920 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges_rt.txt @@ -0,0 +1,10 @@ +1 0.0 255.0 0.5811487324237921 -0.5503702693581581 1.648145 -2.802485 0.0 1572.3096923828125 +2 0.0 1572.3096923828125 0.26272463005783797 -0.2867645202279091 0.501206 -0.47985682 0.0 3183.7813264160477 +3 0.0 3183.7813264160477 0.15785247704386754 -0.16606662392616273 0.5545839 -0.42038992 0.0 1765.4451872558668 +4 0.0 1765.4451872558668 0.11035470351576919 -0.10464580833911895 0.9042998 -1.4275751 0.0 1345.5418548586083 +5 0.0 1345.5418548586083 0.10250756608694818 -0.09240880391001702 2.4040315 -0.45662758 0.0 1227.3563232421875 +6 0.0 1227.3563232421875 0.02963459612801672 -0.030517672039568428 0.09377053 -0.07124679 0.0 1034.5966391601676 +7 0.0 1034.5966391601676 0.039147199764847845 -0.038392101023346184 0.1841282 -0.050027702 0.0 839.0697069702154 +8 0.0 839.0697069702154 0.08549865524470925 -0.05494491942599416 0.15416704 -0.16314922 -608.3993963623047 1082.8444653320819 +9 0 0 0 0 0 0 0 0 + diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..377bc6a5628a5f869ccab9723838622afcbb210c --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/tuner_confs.txt @@ -0,0 +1,13 @@ +750.80768325 ++++++ +conf1 1.0 0 79.1 0.0 +1 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 relu fp32 1 +4 gpu conv fp32 1 add fp32 1 relu fp32 1 +5 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +6 gpu mul fp32 1 add fp32 1 relu fp32 1 +7 gpu mul fp32 1 add fp32 1 relu fp32 1 +8 gpu mul fp32 1 add fp32 1 +9 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..2a94f5c018eb44a397ea09e6f7ab3681d0c3c0f6 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges_rt.txt @@ -0,0 +1,4 @@ +1 0 1 -1 1 -1 1 -1 1 +2 -1 1 -1 1 -1 1 -1 1 +3 -1 1 -1 1 -1 1 -1 1 +4 -1 1 -1 1 -1 1 -1 1 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..f2a85f352fe024f0fcf7828c259f8549f6461e24 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/tuner_confs.txt @@ -0,0 +1,9 @@ +2000 ++++++ +conf1 1 0 99.69 0 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu mul fp32 1 add fp32 1 tanh fp32 1 +4 gpu mul fp32 1 add fp32 1 tanh fp32 1 +5 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp index 29564cfd423494f2d9aed778a20d010deb6fa265..3613e9f1325d73e7515a88f3e198bcd32821224c 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp @@ -265,33 +265,34 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv1.bin"); + std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 1, 5, 5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv1_bias.bin"); + std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); void *conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2.bin"); + std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); void *conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0, 64, 32, 5, 5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2_bias.bin"); + std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); void *conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0, 1, 64, 1, 1); - std::string dense_1_w_path = dir_prefix + std::string("fc1.bin"); + std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); void *dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 3136, 1024); - std::string dense_1_b_path = dir_prefix + std::string("fc1_bias.bin"); + std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 1024, 1, 1); - std::string dense_2_w_path = dir_prefix + std::string("fc2.bin"); + std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); void *dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 1024, 10); - std::string dense_2_b_path = dir_prefix + std::string("fc2_bias.bin"); + std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1); void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 1, 28, 28); @@ -323,7 +324,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); __hpvm__cleanup(); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp index 8a5356581093ac574282463bed311997eae89552..9a8bfbc68fcaad4b369223c53e98121e9934b27d 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1); @@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1); @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_6_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) { } void var_7_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1) { } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_10_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_13_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_tanh(t1); @@ -265,33 +265,34 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv1.bin"); + std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 1, 5, 5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv1_bias.bin"); + std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); void *conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2.bin"); + std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); void *conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0, 64, 32, 5, 5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2_bias.bin"); + std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); void *conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0, 1, 64, 1, 1); - std::string dense_1_w_path = dir_prefix + std::string("fc1.bin"); + std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); void *dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 3136, 1024); - std::string dense_1_b_path = dir_prefix + std::string("fc1_bias.bin"); + std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 1024, 1, 1); - std::string dense_2_w_path = dir_prefix + std::string("fc2.bin"); + std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); void *dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 1024, 10); - std::string dense_2_b_path = dir_prefix + std::string("fc2_bias.bin"); + std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1); // void* input = readTrainedWeights(input_path.c_str(), 0, 5000,1,28,28); @@ -340,7 +341,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..75211f858c1cc9eb6a186dc7f90c143ea820ef67 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/quant_ranges_rt.txt @@ -0,0 +1,15 @@ +1 -1.9892114 2.126797 -2.19630692005 1.34758170414 0.0 0.0 -60.892750473 51.9925691605 +2 0.0 5.71354155397 -0.931772116065 1.07742589378 0.0 0.0 -6.51858950329 6.81084251881 +3 0.0 4.93213940287 -0.531654466152 0.57537904036 0.0 0.0 -4.48263123512 3.96730119753 +4 0.0 4.10326339769 -0.362340988219 0.407691390038 0.0 0.0 -4.04261828327 3.8867793293 +5 0.0 5.38322130251 -0.313120054901 0.293576799393 0.0 0.0 -5.92146921539 4.33867932415 +6 0.0 4.31673815441 -0.232992478013 0.258029025793 0.0 0.0 -4.20778994751 3.93243697071 +7 0.0 5.8304081068 -0.202337772191 0.189983081758 0.0 0.0 -6.29828691578 4.84813511753 +8 0.0 4.44641780996 -0.174427356511 0.176958308667 0.0 0.0 -4.34791088581 3.61443646955 +9 0.0 4.5180956049 -0.145467961878 0.15256431669 0.0 0.0 -3.02877027559 2.94873657799 +10 0.0 6.34857563496 -0.130258745223 0.135582433432 0.0 0.0 -4.22931008053 3.53150463724 +11 0.0 5.22100311041 -0.119001727596 0.125363747835 0.0 0.0 -4.03820378017 4.00400940704 +12 0.0 5.73249834776 -0.108397216856 0.116256686077 0.0 0.0 -3.31110151148 4.46293323326 +13 0.0 7.24049821186 -0.0862374496162 0.0885944995135 0.0 0.0 -4.17543139458 6.2043294754 +14 0.0 7.81395883465 -0.0681302513927 0.0700202777982 0.0 0.0 -10.9205664234 2.64429125786 +15 0.0 2.86920666504 -0.223010196954 0.14426593782 -0.1654396 0.23336112 -12.2459499588 23.8053251343 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed02ddab0dbef2b21f785226b80f4eee7a1735cf --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/tuner_confs.txt @@ -0,0 +1,175 @@ +1000 ++++++ +conf1 1 0 84.8 0 +1 gpu conv fp32 1 +2 gpu batchnorm fp32 1 +3 gpu relu fp32 1 +4 gpu group_conv fp32 1 +5 gpu batchnorm fp32 1 +6 gpu relu fp32 1 +7 gpu conv fp32 1 +8 gpu batchnorm fp32 1 +9 gpu relu fp32 1 +10 gpu group_conv fp32 1 +11 gpu batchnorm fp32 1 +12 gpu relu fp32 1 +13 gpu conv fp32 1 +14 gpu batchnorm fp32 1 +15 gpu relu fp32 1 +16 gpu group_conv fp32 1 +17 gpu batchnorm fp32 1 +18 gpu relu fp32 1 +19 gpu conv fp32 1 +20 gpu batchnorm fp32 1 +21 gpu relu fp32 1 +22 gpu group_conv fp32 1 +23 gpu batchnorm fp32 1 +24 gpu relu fp32 1 +25 gpu conv fp32 1 +26 gpu batchnorm fp32 1 +27 gpu relu fp32 1 +28 gpu group_conv fp32 1 +29 gpu batchnorm fp32 1 +30 gpu relu fp32 1 +31 gpu conv fp32 1 +32 gpu batchnorm fp32 1 +33 gpu relu fp32 1 +34 gpu group_conv fp32 1 +35 gpu batchnorm fp32 1 +36 gpu relu fp32 1 +37 gpu conv fp32 1 +38 gpu batchnorm fp32 1 +39 gpu relu fp32 1 +40 gpu group_conv fp32 1 +41 gpu batchnorm fp32 1 +42 gpu relu fp32 1 +43 gpu conv fp32 1 +44 gpu batchnorm fp32 1 +45 gpu relu fp32 1 +46 gpu group_conv fp32 1 +47 gpu batchnorm fp32 1 +48 gpu relu fp32 1 +49 gpu conv fp32 1 +50 gpu batchnorm fp32 1 +51 gpu relu fp32 1 +52 gpu group_conv fp32 1 +53 gpu batchnorm fp32 1 +54 gpu relu fp32 1 +55 gpu conv fp32 1 +56 gpu batchnorm fp32 1 +57 gpu relu fp32 1 +58 gpu group_conv fp32 1 +59 gpu batchnorm fp32 1 +60 gpu relu fp32 1 +61 gpu conv fp32 1 +62 gpu batchnorm fp32 1 +63 gpu relu fp32 1 +64 gpu group_conv fp32 1 +65 gpu batchnorm fp32 1 +66 gpu relu fp32 1 +67 gpu conv fp32 1 +68 gpu batchnorm fp32 1 +69 gpu relu fp32 1 +70 gpu group_conv fp32 1 +71 gpu batchnorm fp32 1 +72 gpu relu fp32 1 +73 gpu conv fp32 1 +74 gpu batchnorm fp32 1 +75 gpu relu fp32 1 +76 gpu group_conv fp32 1 +77 gpu batchnorm fp32 1 +78 gpu relu fp32 1 +79 gpu conv fp32 1 +80 gpu batchnorm fp32 1 +81 gpu relu fp32 1 +82 gpu pool_mean fp32 1 +83 gpu mul fp32 1 add fp32 1 +84 gpu softmax fp32 1 +----- ++++++ +conf2 1.5 0 84.8 0 +1 gpu conv fp16 1 +2 gpu batchnorm fp16 1 +3 gpu relu fp16 1 +4 gpu group_conv fp16 1 +5 gpu batchnorm fp16 1 +6 gpu relu fp16 1 +7 gpu conv fp16 1 +8 gpu batchnorm fp16 1 +9 gpu relu fp16 1 +10 gpu group_conv fp16 1 +11 gpu batchnorm fp16 1 +12 gpu relu fp16 1 +13 gpu conv fp16 1 +14 gpu batchnorm fp16 1 +15 gpu relu fp16 1 +16 gpu group_conv fp16 1 +17 gpu batchnorm fp16 1 +18 gpu relu fp16 1 +19 gpu conv fp16 1 +20 gpu batchnorm fp16 1 +21 gpu relu fp16 1 +22 gpu group_conv fp16 1 +23 gpu batchnorm fp16 1 +24 gpu relu fp16 1 +25 gpu conv fp16 1 +26 gpu batchnorm fp16 1 +27 gpu relu fp16 1 +28 gpu group_conv fp16 1 +29 gpu batchnorm fp16 1 +30 gpu relu fp16 1 +31 gpu conv fp16 1 +32 gpu batchnorm fp16 1 +33 gpu relu fp16 1 +34 gpu group_conv fp16 1 +35 gpu batchnorm fp16 1 +36 gpu relu fp16 1 +37 gpu conv fp16 1 +38 gpu batchnorm fp16 1 +39 gpu relu fp16 1 +40 gpu group_conv fp16 1 +41 gpu batchnorm fp16 1 +42 gpu relu fp16 1 +43 gpu conv fp16 1 +44 gpu batchnorm fp16 1 +45 gpu relu fp16 1 +46 gpu group_conv fp16 1 +47 gpu batchnorm fp16 1 +48 gpu relu fp16 1 +49 gpu conv fp16 1 +50 gpu batchnorm fp16 1 +51 gpu relu fp16 1 +52 gpu group_conv fp16 1 +53 gpu batchnorm fp16 1 +54 gpu relu fp16 1 +55 gpu conv fp16 1 +56 gpu batchnorm fp16 1 +57 gpu relu fp16 1 +58 gpu group_conv fp16 1 +59 gpu batchnorm fp16 1 +60 gpu relu fp16 1 +61 gpu conv fp16 1 +62 gpu batchnorm fp16 1 +63 gpu relu fp16 1 +64 gpu group_conv fp16 1 +65 gpu batchnorm fp16 1 +66 gpu relu fp16 1 +67 gpu conv fp16 1 +68 gpu batchnorm fp16 1 +69 gpu relu fp16 1 +70 gpu group_conv fp16 1 +71 gpu batchnorm fp16 1 +72 gpu relu fp16 1 +73 gpu conv fp16 1 +74 gpu batchnorm fp16 1 +75 gpu relu fp16 1 +76 gpu group_conv fp16 1 +77 gpu batchnorm fp16 1 +78 gpu relu fp16 1 +79 gpu conv fp16 1 +80 gpu batchnorm fp16 1 +81 gpu relu fp16 1 +82 gpu pool_mean fp16 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp index f090669e4015854634e67b1380e3204e94034a11..b32dccabc2f29b54e8da35551f8d982cd13a378c 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp @@ -1966,6 +1966,7 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet/"; std::string input_path = dir_prefix + std::string("input.bin"); @@ -2502,7 +2503,7 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32); - uint8_t *labels = readLabels(labels_path.c_str(), 5000); + uint32_t *labels = readLabels3(labels_path.c_str(), 5000); __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); @@ -2788,10 +2789,10 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); __hpvm__cleanup(); - computeAccuracy2(labels, 5000, result); + computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp index 59044b6a1020d64509dd75bb636cce64275da249..047697767d9fa0d7f428a02eeb6b8a9566597137 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -63,7 +63,7 @@ void var_5_node(void *t1, size_t bytes_t1) { } void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -115,7 +115,7 @@ void var_11_node(void *t1, size_t bytes_t1) { } void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -167,7 +167,7 @@ void var_17_node(void *t1, size_t bytes_t1) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -219,7 +219,7 @@ void var_23_node(void *t1, size_t bytes_t1) { } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -271,7 +271,7 @@ void var_29_node(void *t1, size_t bytes_t1) { } void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -323,7 +323,7 @@ void var_35_node(void *t1, size_t bytes_t1) { } void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -375,7 +375,7 @@ void var_41_node(void *t1, size_t bytes_t1) { } void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -427,7 +427,7 @@ void var_47_node(void *t1, size_t bytes_t1) { } void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -479,7 +479,7 @@ void var_53_node(void *t1, size_t bytes_t1) { } void var_54_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -531,7 +531,7 @@ void var_59_node(void *t1, size_t bytes_t1) { } void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -583,7 +583,7 @@ void var_65_node(void *t1, size_t bytes_t1) { } void var_66_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -635,7 +635,7 @@ void var_71_node(void *t1, size_t bytes_t1) { } void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -687,7 +687,7 @@ void var_77_node(void *t1, size_t bytes_t1) { } void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1); @@ -721,7 +721,7 @@ void var_81_node(void *t1, size_t bytes_t1) { } void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -729,7 +729,7 @@ void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_83_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -1966,9 +1966,6 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { - - // std::string dir_prefix = - // std::string("../../../../../projects/hpvm-tensor-rt/model_params/mobilenet_quant/"); std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet/"; std::string input_path = dir_prefix + std::string("input.bin"); @@ -2811,7 +2808,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a7b14d7348f424556ba5e7bb52b6fdf9bbbd89c --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/quant_ranges_rt.txt @@ -0,0 +1,22 @@ +1 -0.5500815 0.60786617 -1.0248864 1.2929907 -0.36291853 0.2533059 0.0 0.753551840782 +2 0.0 0.753551840782 -0.69884616 0.71849966 -0.2781147 0.45571187 0.0 1.01057458043 +3 0.0 1.01057458043 -0.59568167 0.7714691 -0.8602873 0.19743633 -1.84771883726 1.87930787086 +4 0.0 2.33981014252 -0.41976976 0.43748936 -0.7021962 0.3033103 0.0 1.04317724705 +5 0.0 1.04317724705 -0.46757826 0.4635873 -0.20662616 0.1778044 -0.829483509064 0.786805033684 +6 0.0 2.49733686686 -0.64404047 0.45383143 -0.819547 0.38550296 0.0 0.897360802293 +7 0.0 0.897360802293 -0.41986948 0.33654243 -0.3563013 0.22371122 -0.957150224447 0.54919362247 +8 0.0 2.37362146616 -0.4805263 0.50655717 -0.296758 0.7742441 0.0 3.01592136621 +9 0.0 3.01592136621 -0.52083415 0.45517674 -0.20242067 0.8236838 -5.2759475708 5.79733039856 +10 0.0 2.37362146616 -0.5338656 1.3395424 -0.20242067 0.8236838 -0.738995380998 2.33600783587 +11 0.0 7.07933432579 -0.34429058 0.43629733 -1.0744808 0.056708273 0.0 1.58645607233 +12 0.0 1.58645607233 -0.30342352 0.39493486 -0.44630566 0.6492069 -1.49672914267 1.29970229745 +13 0.0 7.11914063454 -0.38351893 0.45775774 -1.4733055 -0.014426912 0.0 1.52876508832 +14 0.0 1.52876508832 -0.25695276 0.45372736 -0.5259744 0.26591402 -1.59576894164 1.08074297309 +15 0.0 6.94405080318 -0.55299705 0.5443531 -0.71790683 1.2730768 0.0 10.3651468277 +16 0.0 10.3651468277 -0.4203967 0.48641303 -0.90653443 1.3546854 -22.372925148 17.2033731079 +17 0.0 6.94405080318 -0.4365755 0.84913826 -0.90653443 1.3546851 -3.66810325861 4.87814051151 +18 0.0 18.8401451111 -0.38657624 0.5228989 -1.2083547 0.76361173 0.0 19.1229192352 +19 0.0 19.1229192352 -0.40857902 0.575035 -1.8731614 1.0960501 -31.3229312897 14.8234729958 +20 0.0 23.7382488823 -0.33079496 0.5893278 -1.0234511 1.0016295 0.0 19.5892774963 +21 0.0 19.5892774963 -0.27897888 0.38280907 -2.2086356 1.0066502 -34.4416886902 20.9890329933 +22 0.0 10.8541981602 -1.5092047 1.0279838 -0.49379802 0.61032647 -40.9121678543 25.7082381058 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a414afad320525deb15bdd32f35c1a1ac4699be --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/tuner_confs.txt @@ -0,0 +1,91 @@ +2000 ++++++ +conf1 1 0 89.59 0 +1 gpu conv fp32 1 add fp32 1 relu fp32 1 +2 gpu conv fp32 1 add fp32 1 relu fp32 1 +3 gpu conv fp32 1 add fp32 1 +4 gpu add fp32 1 +5 gpu relu fp32 1 +6 gpu conv fp32 1 add fp32 1 relu fp32 1 +7 gpu conv fp32 1 add fp32 1 +8 gpu add fp32 1 +9 gpu relu fp32 1 +10 gpu conv fp32 1 add fp32 1 relu fp32 1 +11 gpu conv fp32 1 add fp32 1 +12 gpu add fp32 1 +13 gpu relu fp32 1 +14 gpu conv fp32 1 add fp32 1 relu fp32 1 +15 gpu conv fp32 1 add fp32 1 +16 gpu conv fp32 1 add fp32 1 +17 gpu add fp32 1 +18 gpu relu fp32 1 +19 gpu conv fp32 1 add fp32 1 relu fp32 1 +20 gpu conv fp32 1 add fp32 1 +21 gpu add fp32 1 +22 gpu relu fp32 1 +23 gpu conv fp32 1 add fp32 1 relu fp32 1 +24 gpu conv fp32 1 add fp32 1 +25 gpu add fp32 1 +26 gpu relu fp32 1 +27 gpu conv fp32 1 add fp32 1 relu fp32 1 +28 gpu conv fp32 1 add fp32 1 +29 gpu conv fp32 1 add fp32 1 +30 gpu add fp32 1 +31 gpu relu fp32 1 +32 gpu conv fp32 1 add fp32 1 relu fp32 1 +33 gpu conv fp32 1 add fp32 1 +34 gpu add fp32 1 +35 gpu relu fp32 1 +36 gpu conv fp32 1 add fp32 1 relu fp32 1 +37 gpu conv fp32 1 add fp32 1 +38 gpu add fp32 1 +39 gpu relu fp32 1 +40 gpu pool_mean fp32 1 +41 gpu mul fp32 1 add fp32 1 +42 gpu softmax fp32 1 +----- ++++++ +conf2 1.5 0 89.59 0 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 +4 gpu add fp16 1 +5 gpu relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 +8 gpu add fp16 1 +9 gpu relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 +11 gpu conv fp16 1 add fp16 1 +12 gpu add fp16 1 +13 gpu relu fp16 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 +17 gpu add fp16 1 +18 gpu relu fp16 1 +19 gpu conv fp16 1 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 +21 gpu add fp16 1 +22 gpu relu fp16 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 +25 gpu add fp16 1 +26 gpu relu fp16 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 +30 gpu add fp16 1 +31 gpu relu fp16 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 +34 gpu add fp16 1 +35 gpu relu fp16 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 +38 gpu add fp16 1 +39 gpu relu fp16 1 +40 gpu pool_mean fp16 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18.cpp index 09ca0f052985236a6e12fd9ea661d2a8640b48a0..d9f96cfdac18876b676369ba2c7c0e8f4e2ea986 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18.cpp @@ -1227,11 +1227,12 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("input.bin"); void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = @@ -1462,7 +1463,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); __hpvm__cleanup(); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18_loop.cpp index dc3097f992900e1263264ddf9da133ac25c6ab47..6bf5a58135d7fe7101c359a29f8909937d9bc8c7 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18_loop.cpp @@ -12,7 +12,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(1); @@ -21,7 +21,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(2); @@ -30,7 +30,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(3); @@ -39,7 +39,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(4); @@ -48,7 +48,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(5); @@ -57,7 +57,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(6); @@ -66,7 +66,7 @@ void var_5_node(void *t1, size_t bytes_t1) { } void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(7); @@ -75,7 +75,7 @@ void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(8); @@ -102,7 +102,7 @@ void var_9_node(void *t1, size_t bytes_t1) { } void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(11); @@ -111,7 +111,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(12); @@ -120,7 +120,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(13); @@ -129,7 +129,7 @@ void var_12_node(void *t1, size_t bytes_t1) { } void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(14); @@ -138,7 +138,7 @@ void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(15); @@ -165,7 +165,7 @@ void var_16_node(void *t1, size_t bytes_t1) { } void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(18); @@ -174,7 +174,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(19); @@ -183,7 +183,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_19_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(20); @@ -192,7 +192,7 @@ void var_19_node(void *t1, size_t bytes_t1) { } void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(21); @@ -201,7 +201,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(22); @@ -228,7 +228,7 @@ void var_23_node(void *t1, size_t bytes_t1) { } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(25); @@ -237,7 +237,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(26); @@ -246,7 +246,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_26_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(27); @@ -255,7 +255,7 @@ void var_26_node(void *t1, size_t bytes_t1) { } void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(28); @@ -264,7 +264,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(29); @@ -273,7 +273,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_29_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(30); @@ -282,7 +282,7 @@ void var_29_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(31); @@ -309,7 +309,7 @@ void var_32_node(void *t1, size_t bytes_t1) { } void var_33_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(34); @@ -318,7 +318,7 @@ void var_33_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(35); @@ -327,7 +327,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_35_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(36); @@ -336,7 +336,7 @@ void var_35_node(void *t1, size_t bytes_t1) { } void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(37); @@ -345,7 +345,7 @@ void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(38); @@ -372,7 +372,7 @@ void var_39_node(void *t1, size_t bytes_t1) { } void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(41); @@ -381,7 +381,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(42); @@ -390,7 +390,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_42_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(43); @@ -399,7 +399,7 @@ void var_42_node(void *t1, size_t bytes_t1) { } void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(44); @@ -408,7 +408,7 @@ void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(45); @@ -435,7 +435,7 @@ void var_46_node(void *t1, size_t bytes_t1) { } void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(48); @@ -444,7 +444,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(49); @@ -453,7 +453,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_49_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(50); @@ -462,7 +462,7 @@ void var_49_node(void *t1, size_t bytes_t1) { } void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(51); @@ -471,7 +471,7 @@ void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(52); @@ -480,7 +480,7 @@ void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(53); @@ -489,7 +489,7 @@ void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_53_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(54); @@ -516,7 +516,7 @@ void var_55_node(void *t1, size_t bytes_t1) { } void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(57); @@ -525,7 +525,7 @@ void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(58); @@ -534,7 +534,7 @@ void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_58_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(59); @@ -543,7 +543,7 @@ void var_58_node(void *t1, size_t bytes_t1) { } void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(60); @@ -552,7 +552,7 @@ void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(61); @@ -579,7 +579,7 @@ void var_62_node(void *t1, size_t bytes_t1) { } void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(64); @@ -588,7 +588,7 @@ void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(65); @@ -597,7 +597,7 @@ void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_65_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(66); @@ -606,7 +606,7 @@ void var_65_node(void *t1, size_t bytes_t1) { } void var_66_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(67); @@ -615,7 +615,7 @@ void var_66_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(68); @@ -651,7 +651,7 @@ void var_70_node(void *t1, size_t bytes_t1) { } void var_71_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(72); @@ -660,7 +660,7 @@ void var_71_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(73); @@ -1305,7 +1305,7 @@ int main() { std::string input_path = dir_prefix + std::string("input.bin"); // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); // uint32_t* labels = readLabels3(labels_path.c_str(),5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -1562,7 +1562,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..e82fdcdca684bc5b836ab2cd80ea397766071d2c --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/data/quant_ranges_rt.txt @@ -0,0 +1,57 @@ +1 0 0 0 0 0 0 0 0 +2 0 0 0 0 0 0 0 0 +3 0 0 0 0 0 0 0 0 +4 0 0 0 0 0 0 0 0 +5 0 0 0 0 0 0 0 0 +6 0 0 0 0 0 0 0 0 +7 0 0 0 0 0 0 0 0 +8 0 0 0 0 0 0 0 0 +9 0 0 0 0 0 0 0 0 +10 0 0 0 0 0 0 0 0 +11 0 0 0 0 0 0 0 0 +12 0 0 0 0 0 0 0 0 +13 0 0 0 0 0 0 0 0 +14 0 0 0 0 0 0 0 0 +15 0 0 0 0 0 0 0 0 +16 0 0 0 0 0 0 0 0 +17 0 0 0 0 0 0 0 0 +18 0 0 0 0 0 0 0 0 +19 0 0 0 0 0 0 0 0 +20 0 0 0 0 0 0 0 0 +21 0 0 0 0 0 0 0 0 +22 0 0 0 0 0 0 0 0 +23 0 0 0 0 0 0 0 0 +24 0 0 0 0 0 0 0 0 +25 0 0 0 0 0 0 0 0 +26 0 0 0 0 0 0 0 0 +27 0 0 0 0 0 0 0 0 +28 0 0 0 0 0 0 0 0 +29 0 0 0 0 0 0 0 0 +30 0 0 0 0 0 0 0 0 +31 0 0 0 0 0 0 0 0 +32 0 0 0 0 0 0 0 0 +33 0 0 0 0 0 0 0 0 +34 0 0 0 0 0 0 0 0 +35 0 0 0 0 0 0 0 0 +36 0 0 0 0 0 0 0 0 +37 0 0 0 0 0 0 0 0 +38 0 0 0 0 0 0 0 0 +39 0 0 0 0 0 0 0 0 +40 0 0 0 0 0 0 0 0 +41 0 0 0 0 0 0 0 0 +42 0 0 0 0 0 0 0 0 +43 0 0 0 0 0 0 0 0 +44 0 0 0 0 0 0 0 0 +45 0 0 0 0 0 0 0 0 +46 0 0 0 0 0 0 0 0 +47 0 0 0 0 0 0 0 0 +48 0 0 0 0 0 0 0 0 +49 0 0 0 0 0 0 0 0 +50 0 0 0 0 0 0 0 0 +51 0 0 0 0 0 0 0 0 +52 0 0 0 0 0 0 0 0 +53 0 0 0 0 0 0 0 0 +54 0 0 0 0 0 0 0 0 +55 0 0 0 0 0 0 0 0 +56 0 0 0 0 0 0 0 0 +57 0 0 0 0 0 0 0 0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..ede27ce6f5952d4d1be47640a46771d1f4c51ab2 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/data/tuner_confs.txt @@ -0,0 +1,177 @@ +7161.053769000008 ++++++ +conf1 1 1 75.7 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +2 gpu batchnorm fp32 11 +3 gpu conv fp32 11 add fp32 1 +4 gpu batchnorm fp32 11 +5 gpu relu fp32 11 +6 gpu conv fp32 11 add fp32 1 +7 gpu batchnorm fp32 11 +8 gpu relu fp32 11 +9 gpu conv fp32 11 add fp32 1 +10 gpu batchnorm fp32 11 +11 gpu conv fp32 11 add fp32 1 +12 gpu batchnorm fp32 11 +13 gpu add fp32 11 +14 gpu relu fp32 11 +15 gpu conv fp32 11 add fp32 1 +16 gpu batchnorm fp32 11 +17 gpu relu fp32 11 +18 gpu conv fp32 11 add fp32 1 +19 gpu batchnorm fp32 11 +20 gpu relu fp32 11 +21 gpu conv fp32 11 add fp32 1 +22 gpu batchnorm fp32 11 +23 gpu add fp32 11 +24 gpu relu fp32 11 +25 gpu conv fp32 11 add fp32 1 +26 gpu batchnorm fp32 11 +27 gpu relu fp32 11 +28 gpu conv fp32 11 add fp32 1 +29 gpu batchnorm fp32 11 +30 gpu relu fp32 11 +31 gpu conv fp32 11 add fp32 1 +32 gpu batchnorm fp32 11 +33 gpu add fp32 11 +34 gpu relu fp32 11 +35 gpu conv fp32 11 add fp32 1 +36 gpu batchnorm fp32 11 +37 gpu relu fp32 11 +38 gpu conv fp32 11 add fp32 1 +39 gpu batchnorm fp32 11 +40 gpu relu fp32 11 +41 gpu conv fp32 11 add fp32 1 +42 gpu batchnorm fp32 11 +43 gpu conv fp32 11 add fp32 1 +44 gpu batchnorm fp32 11 +45 gpu add fp32 11 +46 gpu relu fp32 11 +47 gpu conv fp32 11 add fp32 1 +48 gpu batchnorm fp32 11 +49 gpu relu fp32 11 +50 gpu conv fp32 11 add fp32 1 +51 gpu batchnorm fp32 11 +52 gpu relu fp32 11 +53 gpu conv fp32 11 add fp32 1 +54 gpu batchnorm fp32 11 +55 gpu add fp32 11 +56 gpu relu fp32 11 +57 gpu conv fp32 11 add fp32 1 +58 gpu batchnorm fp32 11 +59 gpu relu fp32 11 +60 gpu conv fp32 11 add fp32 1 +61 gpu batchnorm fp32 11 +62 gpu relu fp32 11 +63 gpu conv fp32 11 add fp32 1 +64 gpu batchnorm fp32 11 +65 gpu add fp32 11 +66 gpu relu fp32 11 +67 gpu conv fp32 11 add fp32 1 +68 gpu batchnorm fp32 11 +69 gpu relu fp32 11 +70 gpu conv fp32 11 add fp32 1 +71 gpu batchnorm fp32 11 +72 gpu relu fp32 11 +73 gpu conv fp32 11 add fp32 1 +74 gpu batchnorm fp32 11 +75 gpu add fp32 11 +76 gpu relu fp32 11 +77 gpu conv fp32 11 add fp32 1 +78 gpu batchnorm fp32 11 +79 gpu relu fp32 11 +80 gpu conv fp32 11 add fp32 1 +81 gpu batchnorm fp32 11 +82 gpu relu fp32 11 +83 gpu conv fp32 11 add fp32 1 +84 gpu batchnorm fp32 11 +85 gpu conv fp32 11 add fp32 1 +86 gpu batchnorm fp32 11 +87 gpu add fp32 11 +88 gpu relu fp32 11 +89 gpu conv fp32 11 add fp32 1 +90 gpu batchnorm fp32 11 +91 gpu relu fp32 11 +92 gpu conv fp32 11 add fp32 1 +93 gpu batchnorm fp32 11 +94 gpu relu fp32 11 +95 gpu conv fp32 11 add fp32 1 +96 gpu batchnorm fp32 11 +97 gpu add fp32 11 +98 gpu relu fp32 11 +99 gpu conv fp32 11 add fp32 1 +100 gpu batchnorm fp32 11 +101 gpu relu fp32 11 +102 gpu conv fp32 11 add fp32 1 +103 gpu batchnorm fp32 11 +104 gpu relu fp32 11 +105 gpu conv fp32 11 add fp32 1 +106 gpu batchnorm fp32 11 +107 gpu add fp32 11 +108 gpu relu fp32 11 +109 gpu conv fp32 11 add fp32 1 +110 gpu batchnorm fp32 11 +111 gpu relu fp32 11 +112 gpu conv fp32 11 add fp32 1 +113 gpu batchnorm fp32 11 +114 gpu relu fp32 11 +115 gpu conv fp32 11 add fp32 1 +116 gpu batchnorm fp32 11 +117 gpu add fp32 11 +118 gpu relu fp32 11 +119 gpu conv fp32 11 add fp32 1 +120 gpu batchnorm fp32 11 +121 gpu relu fp32 11 +122 gpu conv fp32 11 add fp32 1 +123 gpu batchnorm fp32 11 +124 gpu relu fp32 11 +125 gpu conv fp32 11 add fp32 1 +126 gpu batchnorm fp32 11 +127 gpu add fp32 11 +128 gpu relu fp32 11 +129 gpu conv fp32 11 add fp32 1 +130 gpu batchnorm fp32 11 +131 gpu relu fp32 11 +132 gpu conv fp32 11 add fp32 1 +133 gpu batchnorm fp32 11 +134 gpu relu fp32 11 +135 gpu conv fp32 11 add fp32 1 +136 gpu batchnorm fp32 11 +137 gpu add fp32 11 +138 gpu relu fp32 11 +139 gpu conv fp32 11 add fp32 1 +140 gpu batchnorm fp32 11 +141 gpu relu fp32 11 +142 gpu conv fp32 11 add fp32 1 +143 gpu batchnorm fp32 11 +144 gpu relu fp32 11 +145 gpu conv fp32 11 add fp32 1 +146 gpu batchnorm fp32 11 +147 gpu conv fp32 11 add fp32 1 +148 gpu batchnorm fp32 11 +149 gpu add fp32 11 +150 gpu relu fp32 11 +151 gpu conv fp32 11 add fp32 1 +152 gpu batchnorm fp32 11 +153 gpu relu fp32 11 +154 gpu conv fp32 11 add fp32 1 +155 gpu batchnorm fp32 11 +156 gpu relu fp32 11 +157 gpu conv fp32 11 add fp32 1 +158 gpu batchnorm fp32 11 +159 gpu add fp32 11 +160 gpu relu fp32 11 +161 gpu conv fp32 11 add fp32 1 +162 gpu batchnorm fp32 11 +163 gpu relu fp32 11 +164 gpu conv fp32 11 add fp32 1 +165 gpu batchnorm fp32 11 +166 gpu relu fp32 11 +167 gpu conv fp32 11 add fp32 1 +168 gpu batchnorm fp32 11 +169 gpu add fp32 11 +170 gpu relu fp32 11 +171 gpu pool_max fp32 11 +172 gpu mul fp32 11 add fp32 1 +173 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp index a95fc1dbaf400c0b189babf75dc2b37df6a4587d..c4bd6be08b5afad0367e93f640c54b45e7d41938 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp @@ -4905,7 +4905,8 @@ typedef struct __attribute__((__packed__)) { int main() { - std::string dir_prefix = std::string("/home/hsharif3/resnet50_imagenet/"); + std::string dir_prefix = + std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -6733,7 +6734,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); __hpvm__cleanup(); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp index aa62ea8c9b41d7df6525ab24bf1bbe346a8b790f..42bad74ac39511a64ee4fd20e589cec5caf14836 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(1); @@ -20,7 +20,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(2); @@ -29,7 +29,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(3); @@ -38,7 +38,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(4); @@ -49,7 +49,7 @@ void var_3_node(void *t1, size_t bytes_t1) { void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(5); @@ -58,7 +58,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(6); @@ -67,7 +67,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(7); @@ -78,7 +78,7 @@ void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(8); @@ -87,7 +87,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_8_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(9); @@ -96,7 +96,7 @@ void var_8_node(void *t1, size_t bytes_t1) { } void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(10); @@ -105,7 +105,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(11); @@ -116,7 +116,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(12); @@ -125,7 +125,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_12_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(13); @@ -134,7 +134,7 @@ void var_12_node(void *t1, size_t bytes_t1) { } void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(14); @@ -143,7 +143,7 @@ void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(15); @@ -154,7 +154,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(16); @@ -163,7 +163,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_16_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(17); @@ -172,7 +172,7 @@ void var_16_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(18); @@ -183,7 +183,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(19); @@ -192,7 +192,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(20); @@ -201,7 +201,7 @@ void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_20_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(21); @@ -210,7 +210,7 @@ void var_20_node(void *t1, size_t bytes_t1) { } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(22); @@ -219,7 +219,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(23); @@ -230,7 +230,7 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_23_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(24); @@ -239,7 +239,7 @@ void var_23_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_24_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(25); @@ -248,7 +248,7 @@ void var_24_node(void *t1, size_t bytes_t1) { } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(26); @@ -257,7 +257,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_26_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(27); @@ -268,7 +268,7 @@ void var_26_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(28); @@ -277,7 +277,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_28_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(29); @@ -286,7 +286,7 @@ void var_28_node(void *t1, size_t bytes_t1) { } void var_29_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(30); @@ -295,7 +295,7 @@ void var_29_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(31); @@ -306,7 +306,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(32); @@ -315,7 +315,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_32_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(33); @@ -324,7 +324,7 @@ void var_32_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_33_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(34); @@ -333,7 +333,7 @@ void var_33_node(void *t1, size_t bytes_t1) { } void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(35); @@ -342,7 +342,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(36); @@ -353,7 +353,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(37); @@ -362,7 +362,7 @@ void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_37_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(38); @@ -371,7 +371,7 @@ void var_37_node(void *t1, size_t bytes_t1) { } void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(39); @@ -380,7 +380,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(40); @@ -391,7 +391,7 @@ void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(41); @@ -400,7 +400,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_41_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(42); @@ -409,7 +409,7 @@ void var_41_node(void *t1, size_t bytes_t1) { } void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(43); @@ -418,7 +418,7 @@ void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(44); @@ -429,7 +429,7 @@ void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(45); @@ -438,7 +438,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(46); @@ -447,7 +447,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_46_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(47); @@ -456,7 +456,7 @@ void var_46_node(void *t1, size_t bytes_t1) { } void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(48); @@ -465,7 +465,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(49); @@ -476,7 +476,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_49_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(50); @@ -485,7 +485,7 @@ void var_49_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_50_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(51); @@ -494,7 +494,7 @@ void var_50_node(void *t1, size_t bytes_t1) { } void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(52); @@ -503,7 +503,7 @@ void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(53); @@ -514,7 +514,7 @@ void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_53_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(54); @@ -523,7 +523,7 @@ void var_53_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_54_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(55); @@ -532,7 +532,7 @@ void var_54_node(void *t1, size_t bytes_t1) { } void var_55_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(56); @@ -541,7 +541,7 @@ void var_55_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(57); @@ -552,7 +552,7 @@ void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(58); @@ -561,7 +561,7 @@ void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_58_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(59); @@ -570,7 +570,7 @@ void var_58_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(60); @@ -581,7 +581,7 @@ void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(61); @@ -590,7 +590,7 @@ void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(62); @@ -599,7 +599,7 @@ void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_62_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(63); @@ -608,7 +608,7 @@ void var_62_node(void *t1, size_t bytes_t1) { } void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(64); @@ -617,7 +617,7 @@ void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(65); @@ -628,7 +628,7 @@ void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_65_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(66); @@ -637,7 +637,7 @@ void var_65_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_66_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(67); @@ -646,7 +646,7 @@ void var_66_node(void *t1, size_t bytes_t1) { } void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(68); @@ -655,7 +655,7 @@ void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(69); @@ -666,7 +666,7 @@ void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_69_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(70); @@ -675,7 +675,7 @@ void var_69_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_70_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(71); @@ -684,7 +684,7 @@ void var_70_node(void *t1, size_t bytes_t1) { } void var_71_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(72); @@ -693,7 +693,7 @@ void var_71_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(73); @@ -704,7 +704,7 @@ void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_73_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(74); @@ -713,7 +713,7 @@ void var_73_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_74_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(75); @@ -722,7 +722,7 @@ void var_74_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_75_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(76); @@ -731,7 +731,7 @@ void var_75_node(void *t1, size_t bytes_t1) { } void var_76_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(77); @@ -740,7 +740,7 @@ void var_76_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_77_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(78); @@ -751,7 +751,7 @@ void var_77_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(79); @@ -760,7 +760,7 @@ void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_79_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(80); @@ -769,7 +769,7 @@ void var_79_node(void *t1, size_t bytes_t1) { } void var_80_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(81); @@ -778,7 +778,7 @@ void var_80_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_81_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(82); @@ -789,7 +789,7 @@ void var_81_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(83); @@ -798,7 +798,7 @@ void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_83_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(84); @@ -807,7 +807,7 @@ void var_83_node(void *t1, size_t bytes_t1) { } void var_84_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(85); @@ -816,7 +816,7 @@ void var_84_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_85_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(86); @@ -827,7 +827,7 @@ void var_85_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_86_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(87); @@ -836,7 +836,7 @@ void var_86_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_87_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(88); @@ -845,7 +845,7 @@ void var_87_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_88_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(89); @@ -854,7 +854,7 @@ void var_88_node(void *t1, size_t bytes_t1) { } void var_89_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(90); @@ -863,7 +863,7 @@ void var_89_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_90_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(91); @@ -874,7 +874,7 @@ void var_90_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_91_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(92); @@ -883,7 +883,7 @@ void var_91_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_92_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(93); @@ -892,7 +892,7 @@ void var_92_node(void *t1, size_t bytes_t1) { } void var_93_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(94); @@ -901,7 +901,7 @@ void var_93_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_94_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(95); @@ -912,7 +912,7 @@ void var_94_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_95_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(96); @@ -921,7 +921,7 @@ void var_95_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_96_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(97); @@ -930,7 +930,7 @@ void var_96_node(void *t1, size_t bytes_t1) { } void var_97_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(98); @@ -939,7 +939,7 @@ void var_97_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_98_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(99); @@ -950,7 +950,7 @@ void var_98_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_99_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(100); @@ -959,7 +959,7 @@ void var_99_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, } void var_100_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(101); @@ -968,7 +968,7 @@ void var_100_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_101_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(102); @@ -977,7 +977,7 @@ void var_101_node(void *t1, size_t bytes_t1) { } void var_102_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(103); @@ -986,7 +986,7 @@ void var_102_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_103_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(104); @@ -997,7 +997,7 @@ void var_103_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_104_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(105); @@ -1006,7 +1006,7 @@ void var_104_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_105_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(106); @@ -1015,7 +1015,7 @@ void var_105_node(void *t1, size_t bytes_t1) { } void var_106_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(107); @@ -1024,7 +1024,7 @@ void var_106_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_107_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(108); @@ -1035,7 +1035,7 @@ void var_107_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_108_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(109); @@ -1044,7 +1044,7 @@ void var_108_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_109_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(110); @@ -1053,7 +1053,7 @@ void var_109_node(void *t1, size_t bytes_t1) { } void var_110_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(111); @@ -1062,7 +1062,7 @@ void var_110_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_111_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(112); @@ -1073,7 +1073,7 @@ void var_111_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_112_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(113); @@ -1082,7 +1082,7 @@ void var_112_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_113_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(114); @@ -1091,7 +1091,7 @@ void var_113_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_114_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(115); @@ -1102,7 +1102,7 @@ void var_114_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_115_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(116); @@ -1111,7 +1111,7 @@ void var_115_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_116_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(117); @@ -1120,7 +1120,7 @@ void var_116_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_117_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(118); @@ -1129,7 +1129,7 @@ void var_117_node(void *t1, size_t bytes_t1) { } void var_118_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(119); @@ -1138,7 +1138,7 @@ void var_118_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_119_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(120); @@ -1149,7 +1149,7 @@ void var_119_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_120_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(121); @@ -1158,7 +1158,7 @@ void var_120_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_121_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(122); @@ -1167,7 +1167,7 @@ void var_121_node(void *t1, size_t bytes_t1) { } void var_122_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(123); @@ -1176,7 +1176,7 @@ void var_122_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_123_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(124); @@ -1187,7 +1187,7 @@ void var_123_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_124_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(125); @@ -1196,7 +1196,7 @@ void var_124_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_125_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(126); @@ -1205,7 +1205,7 @@ void var_125_node(void *t1, size_t bytes_t1) { } void var_126_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(127); @@ -1214,7 +1214,7 @@ void var_126_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_127_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(128); @@ -1225,7 +1225,7 @@ void var_127_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_128_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(129); @@ -1234,7 +1234,7 @@ void var_128_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_129_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(130); @@ -1243,7 +1243,7 @@ void var_129_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_130_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(131); @@ -1252,7 +1252,7 @@ void var_130_node(void *t1, size_t bytes_t1) { } void var_131_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(132); @@ -1261,7 +1261,7 @@ void var_131_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_132_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(133); @@ -1272,7 +1272,7 @@ void var_132_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_133_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(134); @@ -1281,7 +1281,7 @@ void var_133_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_134_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(135); @@ -1290,7 +1290,7 @@ void var_134_node(void *t1, size_t bytes_t1) { } void var_135_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(136); @@ -1299,7 +1299,7 @@ void var_135_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_136_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(137); @@ -1310,7 +1310,7 @@ void var_136_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_137_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(138); @@ -1319,7 +1319,7 @@ void var_137_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_138_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(139); @@ -1328,7 +1328,7 @@ void var_138_node(void *t1, size_t bytes_t1) { } void var_139_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(140); @@ -1337,7 +1337,7 @@ void var_139_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_140_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(141); @@ -1348,7 +1348,7 @@ void var_140_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_141_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(142); @@ -1357,7 +1357,7 @@ void var_141_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_142_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(143); @@ -1366,7 +1366,7 @@ void var_142_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_143_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(144); @@ -1375,7 +1375,7 @@ void var_143_node(void *t1, size_t bytes_t1) { } void var_144_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(145); @@ -1384,7 +1384,7 @@ void var_144_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_145_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(146); @@ -1395,7 +1395,7 @@ void var_145_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_146_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(147); @@ -1404,7 +1404,7 @@ void var_146_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_147_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(148); @@ -1413,7 +1413,7 @@ void var_147_node(void *t1, size_t bytes_t1) { } void var_148_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(149); @@ -1422,7 +1422,7 @@ void var_148_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_149_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(150); @@ -1433,7 +1433,7 @@ void var_149_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_150_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(151); @@ -1442,7 +1442,7 @@ void var_150_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_151_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(152); @@ -1451,7 +1451,7 @@ void var_151_node(void *t1, size_t bytes_t1) { } void var_152_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(153); @@ -1460,7 +1460,7 @@ void var_152_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_153_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(154); @@ -1471,7 +1471,7 @@ void var_153_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_154_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(155); @@ -1480,7 +1480,7 @@ void var_154_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_155_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(156); @@ -1489,7 +1489,7 @@ void var_155_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_156_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(157); @@ -1498,7 +1498,7 @@ void var_156_node(void *t1, size_t bytes_t1) { } void var_157_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(158); @@ -1507,7 +1507,7 @@ void var_157_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_158_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(159); @@ -1518,7 +1518,7 @@ void var_158_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_159_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(160); @@ -1527,7 +1527,7 @@ void var_159_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_160_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(161); @@ -1536,7 +1536,7 @@ void var_160_node(void *t1, size_t bytes_t1) { } void var_161_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(162); @@ -1545,7 +1545,7 @@ void var_161_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_162_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(163); @@ -1556,7 +1556,7 @@ void var_162_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_163_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(164); @@ -1565,7 +1565,7 @@ void var_163_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_164_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(165); @@ -1574,7 +1574,7 @@ void var_164_node(void *t1, size_t bytes_t1) { } void var_165_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(166); @@ -1583,7 +1583,7 @@ void var_165_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_166_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(167); @@ -1594,7 +1594,7 @@ void var_166_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_167_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(168); @@ -1603,7 +1603,7 @@ void var_167_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_168_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(169); @@ -1612,7 +1612,7 @@ void var_168_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_169_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(170); @@ -1621,7 +1621,7 @@ void var_169_node(void *t1, size_t bytes_t1) { } void var_170_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(171); @@ -1630,7 +1630,7 @@ void var_170_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_171_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(172); @@ -1641,7 +1641,7 @@ void var_171_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_172_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(173); @@ -1650,7 +1650,7 @@ void var_172_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_173_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(174); @@ -1659,7 +1659,7 @@ void var_173_node(void *t1, size_t bytes_t1) { } void var_174_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(175); @@ -1668,7 +1668,7 @@ void var_174_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_175_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(176); @@ -1679,7 +1679,7 @@ void var_175_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_176_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(177); @@ -1688,7 +1688,7 @@ void var_176_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_177_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(178); @@ -1697,7 +1697,7 @@ void var_177_node(void *t1, size_t bytes_t1) { } void var_178_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(179); @@ -1706,7 +1706,7 @@ void var_178_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_179_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(180); @@ -1717,7 +1717,7 @@ void var_179_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_180_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(181); @@ -1726,7 +1726,7 @@ void var_180_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_181_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(182); @@ -1735,7 +1735,7 @@ void var_181_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_182_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(183); @@ -1744,7 +1744,7 @@ void var_182_node(void *t1, size_t bytes_t1) { } void var_183_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(184); @@ -1753,7 +1753,7 @@ void var_183_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_184_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(185); @@ -1764,7 +1764,7 @@ void var_184_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_185_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(186); @@ -1773,7 +1773,7 @@ void var_185_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_186_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(187); @@ -1782,7 +1782,7 @@ void var_186_node(void *t1, size_t bytes_t1) { } void var_187_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(188); @@ -1791,7 +1791,7 @@ void var_187_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_188_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(189); @@ -1802,7 +1802,7 @@ void var_188_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_189_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(190); @@ -1811,7 +1811,7 @@ void var_189_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_190_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(191); @@ -1820,7 +1820,7 @@ void var_190_node(void *t1, size_t bytes_t1) { } void var_191_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(192); @@ -1829,7 +1829,7 @@ void var_191_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_192_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(193); @@ -1840,7 +1840,7 @@ void var_192_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_193_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(194); @@ -1849,7 +1849,7 @@ void var_193_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_194_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(195); @@ -1858,7 +1858,7 @@ void var_194_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_195_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(196); @@ -1869,7 +1869,7 @@ void var_195_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_196_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(197); @@ -1878,7 +1878,7 @@ void var_196_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_197_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(198); @@ -1887,7 +1887,7 @@ void var_197_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_198_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(199); @@ -1896,7 +1896,7 @@ void var_198_node(void *t1, size_t bytes_t1) { } void var_199_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(200); @@ -1905,7 +1905,7 @@ void var_199_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_200_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(201); @@ -1916,7 +1916,7 @@ void var_200_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_201_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(202); @@ -1925,7 +1925,7 @@ void var_201_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_202_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(203); @@ -1934,7 +1934,7 @@ void var_202_node(void *t1, size_t bytes_t1) { } void var_203_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(204); @@ -1943,7 +1943,7 @@ void var_203_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_204_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(205); @@ -1954,7 +1954,7 @@ void var_204_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_205_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(206); @@ -1963,7 +1963,7 @@ void var_205_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_206_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(207); @@ -1972,7 +1972,7 @@ void var_206_node(void *t1, size_t bytes_t1) { } void var_207_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(208); @@ -1981,7 +1981,7 @@ void var_207_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_208_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(209); @@ -1992,7 +1992,7 @@ void var_208_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_209_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(210); @@ -2001,7 +2001,7 @@ void var_209_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_210_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(211); @@ -2010,7 +2010,7 @@ void var_210_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_211_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(212); @@ -2019,7 +2019,7 @@ void var_211_node(void *t1, size_t bytes_t1) { } void var_212_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(213); @@ -2028,7 +2028,7 @@ void var_212_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_213_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(214); @@ -2039,7 +2039,7 @@ void var_213_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_214_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(215); @@ -2048,7 +2048,7 @@ void var_214_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_215_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(216); @@ -2057,7 +2057,7 @@ void var_215_node(void *t1, size_t bytes_t1) { } void var_216_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(217); @@ -2066,7 +2066,7 @@ void var_216_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_217_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(218); @@ -2077,7 +2077,7 @@ void var_217_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_218_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(219); @@ -2086,7 +2086,7 @@ void var_218_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_219_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(220); @@ -2095,7 +2095,7 @@ void var_219_node(void *t1, size_t bytes_t1) { } void var_220_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(221); @@ -2104,7 +2104,7 @@ void var_220_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_221_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(222); @@ -2115,7 +2115,7 @@ void var_221_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { void var_222_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3, void *t4, size_t bytes_t4, void *t5, size_t bytes_t5) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(5, t1, t2, t3, t4, t5, 0); __hpvm__node_id(223); @@ -2124,7 +2124,7 @@ void var_222_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, } void var_223_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(224); @@ -2133,7 +2133,7 @@ void var_223_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_224_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(225); @@ -2142,7 +2142,7 @@ void var_224_node(void *t1, size_t bytes_t1) { } void var_225_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(226); @@ -2151,7 +2151,7 @@ void var_225_node(void *t1, size_t bytes_t1) { } void var_226_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(227); @@ -2160,7 +2160,7 @@ void var_226_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_227_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); __hpvm__node_id(228); @@ -2169,7 +2169,7 @@ void var_227_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_228_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); __hpvm__node_id(229); @@ -5134,7 +5134,8 @@ typedef struct __attribute__((__packed__)) { int main() { - std::string dir_prefix = std::string("/home/hsharif3/resnet50_imagenet/"); + std::string dir_prefix = + std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -6979,7 +6980,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..19f5523523f3b9fc7b8f81c69112630003d5597e --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/data/quant_ranges_rt.txt @@ -0,0 +1,15 @@ +1 -1.8816367 2.0934217 -0.53275156 0.49437004 -0.6403629 0.2490165 0.0 1.35908746719 +2 0.0 1.35908746719 -0.2688396 0.20639156 -0.7745511 0.82006615 0.0 2.52123117924 +3 0.0 2.52123117924 -0.16776876 0.14878987 -0.35283303 0.5154362 0.0 1.20119857848 +4 0.0 1.20119857848 -0.088948585 0.114222586 -0.30250227 0.36856708 0.0 1.03598809302 +5 0.0 1.03598809302 -0.07739562 0.10973293 -0.15568458 0.17634983 0.0 0.300495595038 +6 0.0 0.300495595038 -0.051649556 0.05435231 -0.07395447 0.07996062 0.0 0.11490475405 +7 0.0 0.11490475405 -0.043513633 0.07577866 -0.06921874 0.02660573 0.0 0.16232508488 +8 0.0 0.16232508488 -0.033842053 0.045218028 -0.022827804 0.023845317 0.0 0.124249965735 +9 0.0 0.124249965735 -0.02211613 0.032084666 -0.02699063 0.03773564 0.0 0.174634486511 +10 0.0 0.174634486511 -0.01979376 0.034854397 -0.036107242 0.07056531 0.0 0.575175762177 +11 0.0 0.575175762177 -0.03452098 0.046055835 -0.051925894 0.07039055 0.0 0.771875114441 +12 0.0 0.771875114441 -0.025946895 0.040090334 -0.06049362 0.12658806 0.0 1.17285169065 +13 0.0 1.17285169065 -0.021766115 0.03315237 -0.20705001 0.117947325 0.0 2.00157693863 +14 0.0 2.00157693863 -0.042597745 0.046707444 -0.21937433 0.2545502 0.0 2.00236111879 +15 0.0 2.00236111879 -0.32550547 0.30829763 -1.1787822 1.2378151 -18.2514705467 24.1736344528 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..c9a6612a5df150f58c69e1a7faeaf83ed5c7d605 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/data/tuner_confs.txt @@ -0,0 +1,38 @@ ++++++ +conf1 1 0 90.19 0 +1 gpu conv fp32 1 add fp32 1 relu fp32 1 +2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 relu fp32 1 +4 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 relu fp32 1 +6 gpu conv fp32 1 add fp32 1 relu fp32 1 +7 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv fp32 1 add fp32 1 relu fp32 1 +9 gpu conv fp32 1 add fp32 1 relu fp32 1 +10 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +11 gpu conv fp32 1 add fp32 1 relu fp32 1 +12 gpu conv fp32 1 add fp32 1 relu fp32 1 +13 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +14 gpu mul fp32 1 add fp32 1 relu fp32 1 +15 gpu mul fp32 1 add fp32 1 +16 gpu softmax fp32 1 +----- ++++++ +conf2 1.5 0 90.19 0 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp index 03c647d74244cf72adce8ddc11d361904f1a5da8..f1533c75b4b838f5b86dfbf915cfd359b9682636 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp @@ -828,9 +828,10 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); @@ -1000,7 +1001,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); computeAccuracy3(labels, result); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp index 21e263e098bfe9d3fdade2774461ec8705a32a04..059bff6d22a51853090700072d4cf3915ed5f796 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) { } void var_6_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) { } void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_9_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) { } void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) { } void var_13_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_16_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) { } void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_19_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) { } void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_22_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) { } void var_23_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) { } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_26_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) { } void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_29_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) { } void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_32_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) { } void var_33_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) { } void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_36_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) { } void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_39_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) { } void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_42_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) { } void var_43_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) { } void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_46_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) { } void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -828,9 +828,10 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); @@ -1012,7 +1013,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/data/quant_ranges_rt.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e78e0a2bf8517734a4f42200b411829b5e39877 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/data/quant_ranges_rt.txt @@ -0,0 +1,15 @@ +1 -1.7829767 1.9456929 -0.7450515 0.71249133 -1.5885142 0.275554 0.0 8.190712 +2 0.0 8.190712 -0.30790088 0.43504623 -1.4242363 1.2602744 0.0 19.023172 +3 0.0 19.023172 -0.29189092 0.26958522 -1.0527138 0.9075671 0.0 14.428051 +4 0.0 14.428051 -0.15521508 0.1829038 -0.845419 1.9358484 0.0 23.065294 +5 0.0 23.065294 -0.13149762 0.14811686 -0.7162557 1.0370971 0.0 15.165984 +6 0.0 15.165984 -0.06236292 0.08321518 -0.9067523 0.9922458 0.0 13.664733 +7 0.0 13.664733 -0.06471479 0.1024472 -0.15943134 0.7988499 0.0 19.025272 +8 0.0 19.025272 -0.06320205 0.08291938 -0.32540628 0.5203079 0.0 6.727217 +9 0.0 6.727217 -0.037707984 0.051601283 -0.25622904 0.11251946 0.0 3.2003012 +10 0.0 3.2003012 -0.056007143 0.09549151 -0.11591503 0.06267536 0.0 4.321189 +11 0.0 4.321189 -0.060094673 0.10868926 -0.105962686 0.09584572 0.0 2.936297 +12 0.0 2.936297 -0.034618977 0.05792674 -0.4237576 0.11035452 0.0 4.87262 +13 0.0 4.87262 -0.035480656 0.058295887 -0.21477045 0.14263579 0.0 10.32133 +14 0.0 10.32133 -0.08929961 0.11301676 -0.20798548 0.47405547 0.0 13.91 +15 0.0 13.91 -0.6627122 0.35539475 -1.0631907 0.9830786 -70.45701 87.34367 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..2662b4ba78dc54686d61f45242fb38f4ca75402c --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/data/tuner_confs.txt @@ -0,0 +1,39 @@ +2000 ++++++ +conf1 1 0 90.19 0 +1 gpu conv fp32 1 add fp32 1 relu fp32 1 +2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 relu fp32 1 +4 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 relu fp32 1 +6 gpu conv fp32 1 add fp32 1 relu fp32 1 +7 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv fp32 1 add fp32 1 relu fp32 1 +9 gpu conv fp32 1 add fp32 1 relu fp32 1 +10 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +11 gpu conv fp32 1 add fp32 1 relu fp32 1 +12 gpu conv fp32 1 add fp32 1 relu fp32 1 +13 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 +14 gpu mul fp32 1 add fp32 1 relu fp32 1 +15 gpu mul fp32 1 add fp32 1 +16 gpu softmax fp32 1 +----- ++++++ +conf2 1.5 0 90.19 0 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp index dcbf59d3fd8a88fd2ef700de6a7a62b64c543b29..41fe9ae0f34c5c5086f8c16491f5035d5a382702 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp @@ -828,10 +828,11 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); @@ -993,8 +994,7 @@ int main() { void *dfg = __hpvm__launch(0, root, (void *)args); __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); __hpvm__cleanup(); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp index e2820d8c04990ef6aeac13fd9b63a7bba97a28ef..3a853d3a0f5399057164594951a884222a02e105 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) { } void var_6_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) { } void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_9_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) { } void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) { } void var_13_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_16_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) { } void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_19_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) { } void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_22_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) { } void var_23_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) { } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_26_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) { } void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_29_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) { } void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_32_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) { } void var_33_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) { } void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_36_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) { } void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_39_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) { } void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_42_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) { } void var_43_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) { } void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_46_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) { } void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -828,10 +828,11 @@ typedef struct __attribute__((__packed__)) { } RootIn; int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); @@ -1013,7 +1014,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); diff --git a/hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/quant_ranges2.txt b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/data/quant_ranges_rt.txt similarity index 100% rename from hpvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_imagenet/quant_ranges2.txt rename to hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/data/quant_ranges_rt.txt diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/data/tuner_confs.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf93cd1286cb6f1358a46cde5991d19ab451c78a --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/data/tuner_confs.txt @@ -0,0 +1,21 @@ +19194.623482 ++++++ +conf1 1 1 72.84 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 +2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 relu fp32 1 +4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp32 11 add fp32 1 relu fp32 1 +6 gpu conv fp32 11 add fp32 1 relu fp32 1 +7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv fp32 11 add fp32 1 relu fp32 1 +9 gpu conv fp32 11 add fp32 1 relu fp32 1 +10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +11 gpu conv fp32 11 add fp32 1 relu fp32 1 +12 gpu conv fp32 11 add fp32 1 relu fp32 1 +13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +14 gpu mul fp32 11 add fp32 1 relu fp32 1 +15 gpu mul fp32 11 add fp32 1 relu fp32 1 +16 gpu mul fp32 11 add fp32 1 +17 gpu softmax fp32 1 +----- diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp index 27db61ee7285432aa22d908679559f8bd8166d6c..f269aa9091521809751cd2214a46d039379c0114 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp @@ -877,7 +877,7 @@ typedef struct __attribute__((__packed__)) { int main() { - std::string dir_prefix = std::string("/home/hsharif3/vgg16_imagenet/"); + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -1053,7 +1053,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); __hpvm__cleanup(); diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp index 0740cf0f3a06d227f86c66b45eb22b4fd5485292..2bd129300adc5ffb609df1e46c951630d682b883 100644 --- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp +++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp @@ -11,7 +11,7 @@ #include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_2_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) { } void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_5_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) { } void var_6_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) { } void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_9_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) { } void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_12_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) { } void var_13_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) { } void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_16_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) { } void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_19_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) { } void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_22_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) { } void var_23_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) { } void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_26_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) { } void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_29_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) { } void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_32_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) { } void var_33_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) { } void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_36_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) { } void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_39_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) { } void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1); @@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_42_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) { } void var_43_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); @@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) { } void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_46_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) { } void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -403,7 +403,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_49_node(void *t1, size_t bytes_t1) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(1, t1, 0); void *r = __hpvm__tensor_relu(t1); @@ -411,7 +411,7 @@ void var_49_node(void *t1, size_t bytes_t1) { } void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_mul(t1, t2); @@ -419,7 +419,7 @@ void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { } void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { - __hpvm__hint(hpvm::PROMISE_TARGET); + __hpvm__hint(hpvm::TENSOR_TARGET); __hpvm__attributes(2, t1, t2, 0); void *r = __hpvm__tensor_add(t1, t2); @@ -877,7 +877,8 @@ typedef struct __attribute__((__packed__)) { int main() { - std::string dir_prefix = std::string("/home/hsharif3/vgg16_imagenet_tune/"); + std::string dir_prefix = + std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -1068,7 +1069,7 @@ int main() { __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->input; + void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);