diff --git a/llvm/projects/gpu_profiler/offline_profiler.cpp b/llvm/projects/gpu_profiler/offline_profiler.cpp index 25ca45241c29e7a0f8edb0518d8347a185caf5a4..6b9f37ef62cc2c8600d11474100f27873bc36d7a 100644 --- a/llvm/projects/gpu_profiler/offline_profiler.cpp +++ b/llvm/projects/gpu_profiler/offline_profiler.cpp @@ -201,11 +201,11 @@ private: } // Executes the program to be profiled - void runProgram(const char * const program) { + void runProgram(const std::string& program) { // Tell the profiling thread to start, execute the program that needs // to be profiled, and then tell the profiling thread to stop. start_ = true; - const auto result = std::system(program); + const auto result = std::system(program.c_str()); stop_ = true; } @@ -471,7 +471,7 @@ public: sys_stream_.close(); } - void profile(const char * const program, const int iterations) { + void profile(const std::string& program, const int iterations) { iterations_ = iterations; resetGlobal(); @@ -499,6 +499,7 @@ public: } void dumpTensorInfo(const char * const filename) const { + std::cout<<"dumping to"<<filename<<'\n'; const std::string header = "Op,Time (ms),Energy (mJ),GPU Energy (mJ),DDR Energy (mJ),Power (mW),GPU Power (mW),DDR Power (mW),Time std,Energy std,GPU Energy std,DDR Energy std,Power std,GPU Power std,DDR Power std\n"; std::ofstream ofs; ofs.open(filename); @@ -568,16 +569,26 @@ public: int main(int argc, char *argv[]) { if (argc < NUM_ARGS) { - std::cout << "Usage: " << argv[0] << " <program> <iterations> <tensor output file> [power output file]\n"; + std::cout << "Usage: " << argv[0] << " <program> <params> END_PARAM <iterations> <tensor output file> [power output file]\n"; exit(1); } + std::string program(argv[1]); + size_t i = 2; + for (; i < argc; i++){ + if (std::string(argv[i]) == "END_PARAM"){ + break; + } + program += " " + std::string(argv[i]); + } + i += 1; + Profiler pp; - pp.profile(argv[1], std::stoi(argv[2])); - pp.dumpTensorInfo(argv[3]); + pp.profile(program, std::stoi(argv[i])); + pp.dumpTensorInfo(argv[i + 1]); if (argc > NUM_ARGS) - pp.dumpPowerReadings(argv[4]); + pp.dumpPowerReadings(argv[i + 2]); return 0; } diff --git a/llvm/projects/hpvm-tensor-rt/table_fixer.py b/llvm/projects/hpvm-tensor-rt/table_fixer.py new file mode 100644 index 0000000000000000000000000000000000000000..3095d15aba0757aca3b74705ba57b5e189b5cecb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/table_fixer.py @@ -0,0 +1,72 @@ +# Fixes table format +# Remove all instances of cifar10 --> each col should start with fp16 or fp32 +# Combine multiple tables + +def fix_columns(table_name, new_filename): + table_file = open(table_name, "r") + + new_table_file = [] + + for line in table_file: + line = line.strip() + if line.startswith("**"): + col_names = line.split() + new_col_names = [] + + for col_name in col_names: + if col_name.find("fp16") != -1: + new_col_names.append(col_name[col_name.find("fp16") : ]) + elif col_name.find("fp32") != -1: + new_col_names.append(col_name[col_name.find("fp32") : ]) + else: + new_col_names.append(col_name) + new_table_file.append(' '.join(new_col_names)) + else: + new_table_file.append(line) + table_file.close() + table_file_new = open(new_filename, "w") + table_file_new.write('\n'.join(new_table_file)) + table_file_new.close() + +def combine_tables(table1, table2, new_filename): + table1_file = open(table1, "r") + table2_file = open(table2, "r") + + table1_data = table1_file.read().strip().split('\n') + table2_data = table2_file.read().strip().split('\n') + new_contents = [] + + table2_ind = 0 + for table1_line in table1_data: + table2_line = table2_data[table2_ind] + + if table1_line.startswith("**"): + assert table2_line.startswith("**") + table2_lst = table2_line.strip().split() + table2_cols = ' '.join(table2_lst[3 : ]) + new_contents.append(table1_line + ' ' + table2_cols) + else: + table2_lst = table2_line.strip().split() + table2_cols = ' '.join(table2_lst[1 : ]) + new_contents.append(table1_line + ' ' + table2_cols) + table2_ind += 1 + + table1_file.close() + table2_file.close() + + new_file = open(new_filename, "w") + new_file.write('\n'.join(new_contents)) + new_file.close() + +import sys + +if __name__ == "__main__": + num_args = len(sys.argv) + + if num_args != 4 and num_args != 5: + print("python table_fixer.py <fix> <filename> OR <combine> <table1> <table2> <new name>") + exit(1) + elif sys.argv[1] == "fix": + fix_columns(sys.argv[2], sys.argv[3]) + elif sys.argv[1] == "combine": + combine_tables(sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h index c1692fc1cbe4c11b3cca8da3e60fa265edf3b2b3..e2905db99ae142b013215dcf90e0e0cbb9c5f70e 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h @@ -213,10 +213,10 @@ void* tensorConvPerfCuda(void* input_ptr, void* filter_ptr, hostToDeviceCopy(input); hostToDeviceCopy(filter); - + profileEvent("H2F_start"); convertToFP32(input); convertToFP32(filter); - + profileEvent("H2F_end"); int n, c, h, w; // output dimensions n = input->dims.dim_sizes[0]; @@ -381,7 +381,7 @@ void* tensorConvPerfCuda(void* input_ptr, void* filter_ptr, } - profileEvent("Conv_end", true); + profileEvent("Conv_end"); //, true); return new_output; @@ -595,6 +595,7 @@ void* tensorConvPerfCudaHalf(void* input_ptr, void* filter_ptr, int row, int col, int start){ INFO("*** TensorConvolution half perforation \n"); + profileEvent("#Conv"); Tensor* input = (Tensor*)input_ptr; Tensor* filter = (Tensor*)filter_ptr; @@ -603,19 +604,14 @@ void* tensorConvPerfCudaHalf(void* input_ptr, void* filter_ptr, conv_groups = 1; } - profileEvent("F2H_start"); - hostToDeviceCopy(input); hostToDeviceCopy(filter); + profileEvent("F2H_start"); convertToFP16(input); convertToFP16(filter); - - /******* END OF INPUT DATA CONVERSIONS*/ profileEvent("F2H_end"); - profileEvent("Conv"); - Tensor* output_half; int n, c, h, w; // output dimensions n = input->dims.dim_sizes[0]; @@ -811,15 +807,14 @@ void* tensorConvPerfCudaHalf(void* input_ptr, void* filter_ptr, freeTensor(output_half); } - profileEvent("Conv_end", true); + //profileEvent("Conv_end", true); profileEvent("H2F_start"); - convertToFP32_offline(new_output); - profileEvent("H2F_end"); - + profileEvent("#Conv_end"); //, true); + return new_output; } @@ -925,6 +920,8 @@ void* tensorConvInputHalf(void* input_ptr, void* filter_ptr, int skip_every, int skip_offset){ INFO("*** TensorHConvolution input sampling \n"); + profileEvent("#Conv"); + Tensor* input = (Tensor*)input_ptr; Tensor* filter = (Tensor*)filter_ptr; //FIXME: Current hack to preserve backward compatibilty @@ -932,19 +929,14 @@ void* tensorConvInputHalf(void* input_ptr, void* filter_ptr, conv_groups = 1; } - profileEvent("F2H_start"); - hostToDeviceCopy(input); hostToDeviceCopy(filter); + profileEvent("F2H_start"); convertToFP16(input); convertToFP16(filter); - - /******* END OF INPUT DATA CONVERSIONS*/ profileEvent("F2H_end"); - profileEvent("Conv"); - Tensor* output; Tensor* new_output; // TODO: Support other cases; @@ -1052,7 +1044,6 @@ void* tensorConvInputHalf(void* input_ptr, void* filter_ptr, cudaFree(convData); cudaFree(reducedFilter); freeTensor(output); - profileEvent("Conv_end", true); profileEvent("H2F_start"); @@ -1061,6 +1052,8 @@ void* tensorConvInputHalf(void* input_ptr, void* filter_ptr, profileEvent("H2F_end"); + profileEvent("#Conv_end", true); + return new_output; } @@ -1182,6 +1175,7 @@ void* tensorConvApprox(void* input_ptr, void* filter_ptr, int row, int col, int skip_every, int offset){ INFO("*** TensorConvolution approximation \n"); + profileEvent("Conv"); Tensor* input = (Tensor*)input_ptr; Tensor* filter = (Tensor*)filter_ptr; @@ -1193,11 +1187,10 @@ void* tensorConvApprox(void* input_ptr, void* filter_ptr, hostToDeviceCopy(input); hostToDeviceCopy(filter); + //profileEvent("H2F_start"); convertToFP32(input); convertToFP32(filter); - - - profileEvent("Conv"); + //profileEvent("H2F_end"); int n, c, h, w; // output dimensions n = input->dims.dim_sizes[0]; @@ -1419,7 +1412,7 @@ void* tensorConvApprox(void* input_ptr, void* filter_ptr, freeTensor(output); } - profileEvent("Conv_end", true); + profileEvent("Conv_end"); return new_output; @@ -1431,6 +1424,7 @@ void* tensorConvApproxHalf(void* input_ptr, void* filter_ptr, int row, int col, int skip_every, int offset){ INFO("*** TensorConvolution half approximation \n"); + profileEvent("#Conv"); Tensor* input = (Tensor*)input_ptr; Tensor* filter = (Tensor*)filter_ptr; @@ -1439,19 +1433,14 @@ void* tensorConvApproxHalf(void* input_ptr, void* filter_ptr, conv_groups = 1; } - profileEvent("F2H_start"); - hostToDeviceCopy(input); hostToDeviceCopy(filter); + profileEvent("F2H_start"); convertToFP16(input); convertToFP16(filter); - - /******* END OF INPUT DATA CONVERSIONS*/ profileEvent("F2H_end"); - profileEvent("Conv"); - int n, c, h, w; // output dimensions n = input->dims.dim_sizes[0]; c = filter->dims.dim_sizes[0]; //number of filters @@ -1674,14 +1663,11 @@ void* tensorConvApproxHalf(void* input_ptr, void* filter_ptr, } - profileEvent("Conv_end", true); - profileEvent("H2F_start"); - convertToFP32_offline(new_output); - profileEvent("H2F_end"); + profileEvent("#Conv_end"); return new_output; } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h index c166a25db39a2a03735c7a785c8fb89b80d85613..5e282d130f2a261ba76ebefa2b92af38682a7def 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h @@ -121,32 +121,42 @@ void* handleTensorConvApproximationTuples( { void* t_out; RC->resume_profiler(); - t_out = tensorConvPerfCuda(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 1, - 1, 1, 0); + t_out = tensorConvApprox(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + 1, 1, + 1, 1, 1, 1); +// t_out = tensorConvPerfCuda(input, filter, +// conv_pad_h, conv_pad_w, +// conv_stride_h, conv_stride_w, +// 1, 1, +// 1, 1, 0); RC->pause_profiler(); std::pair<double, double> pinfo = RC->get_time_energy(); RC->reset_profiler(); - RC->addToCurrentIterationComputeTime("tensorConvPerfCuda", pinfo.first); - RC->addToCurrentIterationComputeEnergy("tensorConvPerfCuda", pinfo.second); + RC->addToCurrentIterationComputeTime("tensorConvApprox", pinfo.first); + RC->addToCurrentIterationComputeEnergy("tensorConvApprox", pinfo.second); return t_out; } case GPUNodeConfiguration::APPROX::FP16 : { void* t_out; RC->resume_profiler(); - t_out = tensorConvPerfCudaHalf(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 1, - 1, 1, 0); + t_out = tensorConvApproxHalf(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + 1, 1, + 1, 1, 1, 1); +// t_out = tensorConvPerfCudaHalf(input, filter, +// conv_pad_h, conv_pad_w, +// conv_stride_h, conv_stride_w, +// 1, 1, +// 1, 1, 0); RC->pause_profiler(); std::pair<double, double> pinfo = RC->get_time_energy(); RC->reset_profiler(); - RC->addToCurrentIterationComputeTime("tensorConvPerfCudaHalf", pinfo.first); - RC->addToCurrentIterationComputeEnergy("tensorConvPerfCudaHalf", pinfo.second); + RC->addToCurrentIterationComputeTime("tensorConvApproxHalf", pinfo.first); + RC->addToCurrentIterationComputeEnergy("tensorConvApproxHalf", pinfo.second); return t_out; } case GPUNodeConfiguration::APPROX::PERFORATION : @@ -190,16 +200,21 @@ void* handleTensorConvApproximationTuples( } void* t_out; RC->resume_profiler(); - t_out = tensorConvPerfCudaHalf(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 1, - row, col, offset); + t_out = tensorConvApproxHalf(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + 1, 1, + row, col, 1, offset); +// t_out = tensorConvPerfCudaHalf(input, filter, +// conv_pad_h, conv_pad_w, +// conv_stride_h, conv_stride_w, +// 1, 1, +// row, col, offset); RC->pause_profiler(); std::pair<double, double> pinfo = RC->get_time_energy(); RC->reset_profiler(); - RC->addToCurrentIterationComputeTime("tensorConvPerfCudaHalf", pinfo.first); - RC->addToCurrentIterationComputeEnergy("tensorConvPerfCudaHalf", pinfo.second); + RC->addToCurrentIterationComputeTime("tensorConvApproxHalf(_perf)", pinfo.first); + RC->addToCurrentIterationComputeEnergy("tensorConvApproxHalf(_perf)", pinfo.second); return t_out; } case GPUNodeConfiguration::APPROX::INPUT_SAMPLING : @@ -231,19 +246,23 @@ void* handleTensorConvApproximationTuples( } void* t_out; RC->resume_profiler(); + t_out = tensorConvApproxHalf(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + 1, 1, + 1, 1, skip_rate, skip_rate - 1/*offset*/); //FIXME + RC->pause_profiler(); + std::pair<double, double> pinfo = RC->get_time_energy(); + RC->reset_profiler(); + RC->addToCurrentIterationComputeTime("tensorConvApproxHalf(_samp)", pinfo.first); + RC->addToCurrentIterationComputeEnergy("tensorConvApproxHalf(_samp)", pinfo.second); + // Overwrite the result writen in t_out with the simulation result, + // to propagate that to the next layer. + // TODO: Remove this call when bug is fixed in // t_out = tensorConvSampSim(input, filter, // conv_pad_h, conv_pad_w, // conv_stride_h, conv_stride_w, // 1, 1, skip_rate, offset); - t_out = tensorConvInputHalf(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 1, skip_rate, offset); - RC->pause_profiler(); - std::pair<double, double> pinfo = RC->get_time_energy(); - RC->reset_profiler(); - RC->addToCurrentIterationComputeTime("tensorConvInputHalf", pinfo.first); - RC->addToCurrentIterationComputeEnergy("tensorConvInputHalf", pinfo.second); return t_out; } default : diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h index 9e143f7f796d53d92a2432d97afce308d76413ce..99c465434a2879d85624b7ff6bb4141dd8fe4634 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h @@ -218,85 +218,85 @@ enum SEARCH_KIND void PROMISENodeConfiguration::print() { - INFO(" promise"); + printf(" promise"); for (auto &it : ApproxChoices) { - DEBUG(" "); + printf(" "); switch (it.first) { case APPROX::SWING_LEVEL : - DEBUG("swing_level"); + printf("swing_level"); break; default: ERROR("Unknown approximation option"); break; // TODO additional approx methods to be printed here } - DEBUG(" %d", it.second); + printf(" %d", it.second); } - DEBUG("\n"); + printf("\n"); } void GPUNodeConfiguration::print() { - DEBUG(" gpu"); + printf(" gpu"); for (auto &it : ApproxChoices) { - DEBUG(" "); + printf(" "); switch (it.first) { case TENSOR_OP::ADD : - DEBUG("add"); + printf("add"); break; case TENSOR_OP::BATCHNORM : - DEBUG ("batchnorm"); + printf("batchnorm"); break; case TENSOR_OP::CONV : - DEBUG("conv"); + printf("conv"); break; case TENSOR_OP::GROUP_CONV : - DEBUG("group_conv"); + printf("group_conv"); break; case TENSOR_OP::MUL : - DEBUG("mul"); + printf("mul"); break; case TENSOR_OP::RELU : - DEBUG("relu"); + printf("relu"); break; case TENSOR_OP::CLIPPED_RELU : - DEBUG("clipped_relu"); + printf("clipped_relu"); break; case TENSOR_OP::TANH : - DEBUG ("tanh"); + printf("tanh"); break; case TENSOR_OP::POOL_MAX : - DEBUG("pool_max"); + printf("pool_max"); break; case TENSOR_OP::POOL_MEAN : - DEBUG("pool_mean"); + printf("pool_mean"); break; case TENSOR_OP::POOL_MIN : - DEBUG("pool_min"); + printf("pool_min"); break; case TENSOR_OP::SOFTMAX : - DEBUG("softmax"); + printf("softmax"); break; case TENSOR_OP::FFT : - DEBUG("fft"); + printf("fft"); break; case TENSOR_OP::REDUCE : - DEBUG("reduce"); + printf("reduce"); break; case TENSOR_OP::PROJECTIVE_T : - DEBUG("projectiveT"); + printf("projectiveT"); break; case TENSOR_OP::MAP1 : - DEBUG("map1"); + printf("map1"); break; case TENSOR_OP::MAP2 : - DEBUG("map2"); + printf("map2"); break; case TENSOR_OP::MAP3 : - DEBUG("map3"); + printf("map3"); break; default : ERROR("Unknown tensor operation."); @@ -306,22 +306,22 @@ void GPUNodeConfiguration::print() { auto &approxVec = it.second; for (auto &inner_it : approxVec) { - DEBUG(" "); + printf(" "); switch (inner_it.first) { case APPROX::FP32 : - DEBUG("fp32"); + printf("fp32"); break; case APPROX::FP16 : - DEBUG("fp16"); + printf("fp16"); break; case APPROX::PERFORATION : - DEBUG("perf"); + printf("perf"); break; case APPROX::INPUT_SAMPLING : - DEBUG("samp"); + printf("samp"); break; case APPROX::REDUCTION_SAMPLING : - DEBUG("red_samp"); + printf("red_samp"); break; default: ERROR("Unknown approximation option"); @@ -329,27 +329,27 @@ void GPUNodeConfiguration::print() { // TODO additional approx methods to be printed here } - DEBUG(" %d", inner_it.second); + printf(" %d", inner_it.second); } } - DEBUG("\n"); + printf("\n"); } void Configuration::print() { - DEBUG("+++++\n"); - DEBUG("%s %f %f %f %f\n", name.c_str(), speedup, energy, accuracy, accuracyLoss); + printf("+++++\n"); + printf("%s %f %f %f %f\n", name.c_str(), speedup, energy, accuracy, accuracyLoss); for (std::map<std::string, NodeConfiguration* >::const_iterator it = setup.begin(); it != setup.end(); ++it) { - DEBUG("%s :", it->first.c_str()); + printf("%s :", it->first.c_str()); it->second->print(); } - DEBUG("-----\n"); + printf("-----\n"); } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/hpvm-rt-controller.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/hpvm-rt-controller.h index 13080c9ed334c1edf8b1539a8c8d71090e88cd3d..af107016a5b8009fef0622487a285ac38a437ad4 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/hpvm-rt-controller.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/hpvm-rt-controller.h @@ -259,6 +259,41 @@ class ProfileInfo { }; +class Slowdowns { + private: + std::vector<float> slowdowns; + unsigned idx; + + public: + Slowdowns() { + idx = 0; + + std::ifstream s_in("slowdowns.txt"); + if (!s_in) { + DEBUG("slowdowns file not found. Initializing slowdowns randomly.\n"); + for (unsigned i = 0; i < 10; i++) { + slowdowns.push_back( 1.0 + (rand()/(RAND_MAX/(5.0-1.0))) ); + } + } else { + for (std::string line; std::getline(s_in, line); ) { + float s = std::stof(line); + slowdowns.push_back(s); + } + } + } + + unsigned getSlowdownsNumber() { + return slowdowns.size(); + } + + float getNextSlowdown() { + float tmp = slowdowns[idx]; + idx = (idx + 1) % slowdowns.size(); + return tmp; + } + +}; + class RuntimeController; RuntimeController *RC; @@ -287,6 +322,7 @@ class RuntimeController { unsigned configurationIdx = 0; double baseline_time = 0.0; // Execution time of baseline configuration + Slowdowns *slowdowns; /*** Objects used to gather timing and energy information for execution ***/ ProfileInfo *PI; @@ -347,6 +383,7 @@ class RuntimeController { void findTargetConfiguration(float, enum SEARCH_KIND); float getGoalSpeedup(); double getBaselineTime(); + Slowdowns *getSlowdowns(); void init(const char *Cstr, const char *Qstr) { // We initialize the path to the profile info output file, @@ -357,17 +394,19 @@ class RuntimeController { readConfigurationFile(Cstr); Configurations = NULL; computeParetoConfigurationPoints(); - compute3DParetoConfigurationPoints(); +// compute3DParetoConfigurationPoints(); Not using 3D curve INFO("Speedup Configurations\n"); printConfigurations(SpeedupConfigurations); - INFO("Energy Configurations\n"); - printConfigurations(EnergyConfigurations); - INFO("3D Configurations\n"); - printConfigurations(ThreeDCurveConfigurations); +// INFO("Energy Configurations\n"); +// printConfigurations(EnergyConfigurations); +// INFO("3D Configurations\n"); +// printConfigurations(ThreeDCurveConfigurations); configurationIdx = 0; //TODO: initialize using pareto curve - findTargetConfiguration ? Configurations = &SpeedupConfigurations; + // Initializations for different runtime control strategies srand(static_cast <unsigned> (time(0))); + slowdowns = new Slowdowns(); // Start profiling thread in the background, ready to time start_profiler(); @@ -1102,6 +1141,10 @@ double RuntimeController::getBaselineTime() { return baseline_time; } +Slowdowns *RuntimeController::getSlowdowns() { + return slowdowns; +} + // Functions to be inserted with initializeTensorRT and clearTensorRT void llvm_hpvm_initializeRuntimeController(const char *ConfigFile, const char *QRangeFile) { RC = new RuntimeController(); @@ -1209,20 +1252,12 @@ void llvm_hpvm_invokeRtControl_BASE(void* result, const char* str, int start, in RC->resume_profiler(); uint32_t* labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end); - hpvm_rt_computeAccuracy3(labels_cached, result); // Read stats for iteration that was just completed double current_iteration_time = RC->getCurrentIterationComputeTime(); double current_iteration_energy = RC->getCurrentIterationComputeEnergy(); -// RC->findNextConfiguration(); -// // Still use findNext configuration, to update the configurationIdx, -// // to point to next location -// enum SEARCH_KIND k = ACCURACY_LOSS; -// float goalVal = RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->accuracyLoss; -// RC->findTargetConfiguration(goalVal, k); - RC->pause_profiler(); std::pair<double, double> pinfo = RC->get_time_energy(); RC->reset_profiler(); @@ -1238,16 +1273,14 @@ void llvm_hpvm_invokeRtControl_BASE(void* result, const char* str, int start, in void llvm_hpvm_invokeRtControl_ITERATE(void* result, const char* str, int start, int end) { - RC->resume_profiler(); - uint32_t* labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end); - hpvm_rt_computeAccuracy3(labels_cached, result); // Read stats for iteration that was just completed double current_iteration_time = RC->getCurrentIterationComputeTime(); double current_iteration_energy = RC->getCurrentIterationComputeEnergy(); + RC->resume_profiler(); RC->findNextConfiguration(); // Still use findNext configuration, to update the configurationIdx, // to point to next location @@ -1288,8 +1321,47 @@ void llvm_hpvm_invokeRtControl_ADJUST(void* result, const char* str, int start, RC->addToCurrentIterationControlTime(pinfo.first); RC->addToCurrentIterationControlEnergy(pinfo.second); - INFO("current iteration time = %f, current iteration energy = %f\n\n", + INFO("current iteration time = %f, current iteration energy = %f\n", + current_iteration_time, current_iteration_energy); + INFO("target speedup = %lf\n\n", target_speedup); + + // Note the end of iteration + RC->end_iteration(); +} + +void llvm_hpvm_invokeRtControl_SLOWDOWN(void* result, const char* str, int start, int end) { + + uint32_t* labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end); + hpvm_rt_computeAccuracy3(labels_cached, result); + + // Read stats for iteration that was just completed + double current_iteration_time = RC->getCurrentIterationComputeTime(); + double current_iteration_energy = RC->getCurrentIterationComputeEnergy(); + + std::string prev_conf_name = + RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->name; + + RC->resume_profiler(); + float slowdown = RC->getSlowdowns()->getNextSlowdown(); + RC->findTargetConfiguration(slowdown, SPEEDUP); + RC->pause_profiler(); + + std::pair<double, double> pinfo = RC->get_time_energy(); + RC->reset_profiler(); + RC->addToCurrentIterationControlTime(pinfo.first); + RC->addToCurrentIterationControlEnergy(pinfo.second); + + std::string next_conf_name = + RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->name; + float next_conf_speedup = + RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->speedup; + + INFO("current iteration time = %f, current iteration energy = %f\n", current_iteration_time, current_iteration_energy); + INFO("slowdown (target speedup) = %f\n", slowdown); + INFO("Previous configuration: %s\n", prev_conf_name.c_str()); + INFO("Swapping to next configuration: %s with speedup %f\n\n", + next_conf_name.c_str(), next_conf_speedup); // Note the end of iteration RC->end_iteration(); @@ -1320,12 +1392,6 @@ void llvm_hpvm_invokeRtControl_RAND(void* result, const char* str, int start, in RC->end_iteration(); } -//void llvm_hpvm_invokeRtControl(void* result, const char* str, int start, int end) { -// llvm_hpvm_invokeRtControl_BASE(result, str, start, end); -// llvm_hpvm_invokeRtControl_ITERATE(result, str, start, end); -// llvm_hpvm_invokeRtControl_ADJUST(result, str, start, end); -// llvm_hpvm_invokeRtControl_RAND(result, str, start, end); -//} #endif diff --git a/llvm/projects/soc_simulator/alexnet2_cifar10/alexnet2_layers.txt b/llvm/projects/soc_simulator/alexnet2_cifar10/alexnet2_layers.txt index afd1ee1e405c06e75bb35edbee3a4f5332b89b35..98dfa6fa380a34ee7ff5ce0615656deab585ac5b 100644 --- a/llvm/projects/soc_simulator/alexnet2_cifar10/alexnet2_layers.txt +++ b/llvm/projects/soc_simulator/alexnet2_cifar10/alexnet2_layers.txt @@ -1,7 +1,7 @@ -Conv1,5000,3,32,32,32,3,3,3,1,1 -Conv2,5000,32,32,32,32,32,3,3,1,1 -Conv3,5000,32,16,16,64,32,3,3,1,1 -Conv4,5000,64,16,16,64,64,3,3,1,1 -Conv5,5000,64,8,8,128,64,3,3,1,1 -Conv6,5000,128,8,8,128,128,3,3,1,1 -FC1,5000,2048,2048,10 +Conv1,2000,3,32,32,32,3,3,3,1,1 +Conv2,2000,32,32,32,32,32,3,3,1,1 +Conv3,2000,32,16,16,64,32,3,3,1,1 +Conv4,2000,64,16,16,64,64,3,3,1,1 +Conv5,2000,64,8,8,128,64,3,3,1,1 +Conv6,2000,128,8,8,128,128,3,3,1,1 +FC1,2000,2048,2048,10 diff --git a/llvm/projects/soc_simulator/alexnet_cifar10/alexnet_layers.txt b/llvm/projects/soc_simulator/alexnet_cifar10/alexnet_layers.txt index 30f10ff16d6c5efcefdef0d9b914191ec0236a43..bc8c3f5668a2fdb5eb8a568f34b334fe02016954 100644 --- a/llvm/projects/soc_simulator/alexnet_cifar10/alexnet_layers.txt +++ b/llvm/projects/soc_simulator/alexnet_cifar10/alexnet_layers.txt @@ -1,6 +1,6 @@ -Conv1,5000,3,32,32,64,3,11,11,1,1 -Conv2,5000,64,16,16,192,64,5,5,1,1 -Conv3,5000,192,8,8,384,192,3,3,1,1 -Conv4,5000,384,8,8,256,384,3,3,1,1 -Conv5,5000,256,8,8,256,256,3,3,1,1 -FC1,5000,4096,4096,10 +Conv1,2000,3,32,32,64,3,11,11,1,1 +Conv2,2000,64,16,16,192,64,5,5,1,1 +Conv3,2000,192,8,8,384,192,3,3,1,1 +Conv4,2000,384,8,8,256,384,3,3,1,1 +Conv5,2000,256,8,8,256,256,3,3,1,1 +FC1,2000,4096,4096,10 diff --git a/llvm/projects/soc_simulator/mobilenet_cifar10/mobilenet_layers.txt b/llvm/projects/soc_simulator/mobilenet_cifar10/mobilenet_layers.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec202b5be38d401551b82746655d45847567307c --- /dev/null +++ b/llvm/projects/soc_simulator/mobilenet_cifar10/mobilenet_layers.txt @@ -0,0 +1,83 @@ +Conv1,2000,3,32,32,32,3,3,3,1,1 +NML1 +NML2 +NML3 +NML4 +NML5 +Conv3,2000,32,32,32,64,32,1,1,1,1 +NML6 +NML7 +NML8 +NML9 +NML10 +Conv5,2000,64,16,16,128,64,1,1,1,1 +NML11 +NML12 +NML13 +NML14 +NML15 +Conv7,2000,128,16,16,128,128,1,1,1,1 +NML16 +NML17 +NML18 +NML19 +NML20 +Conv9,2000,128,8,8,256,128,1,1,1,1 +NML21 +NML22 +NML23 +NML24 +NML25 +Conv11,2000,256,8,8,256,256,1,1,1,1 +NML26 +NML27 +NML28 +NML29 +NML30 +Conv13,2000,256,4,4,512,256,1,1,1,1 +NML31 +NML32 +NML33 +NML34 +NML35 +Conv15,2000,512,4,4,512,512,1,1,1,1 +NML36 +NML37 +NML38 +NML39 +NML40 +Conv17,2000,512,4,4,512,512,1,1,1,1 +NML41 +NML42 +NML43 +NML44 +NML45 +Conv19,2000,512,4,4,512,512,1,1,1,1 +NML46 +NML47 +NML48 +NML49 +NML50 +Conv21,2000,512,4,4,512,512,1,1,1,1 +NML51 +NML52 +NML53 +NML54 +NML55 +Conv23,2000,512,4,4,512,512,1,1,1,1 +NML56 +NML57 +NML58 +NML59 +NML60 +Conv25,2000,512,2,2,1024,512,1,1,1,1 +NML61 +NML62 +NML63 +NML64 +NML65 +Conv27,2000,1024,2,2,1024,1024,1,1,1,1 +NML66 +NML67 +NML68 +FC1,2000,1024,1024,10 diff --git a/llvm/projects/soc_simulator/mobilenet_shallow/mobilenet_shallow_layers.txt b/llvm/projects/soc_simulator/mobilenet_shallow/mobilenet_shallow_layers.txt index 4ab1093cab28b2f3dfa284a4669c6a2885ff667d..ba85aa142542d34722b19b9a16314100ecdd62da 100644 --- a/llvm/projects/soc_simulator/mobilenet_shallow/mobilenet_shallow_layers.txt +++ b/llvm/projects/soc_simulator/mobilenet_shallow/mobilenet_shallow_layers.txt @@ -1,41 +1,41 @@ -Conv1,4500,3,32,32,32,3,3,3,1,1 +Conv1,2000,3,32,32,32,3,3,3,1,1 NML1 NML2 NML3 NML4 NML5 -Conv3,4500,32,32,32,64,32,1,1,1,1 +Conv3,2000,32,32,32,64,32,1,1,1,1 NML6 NML7 NML8 NML9 NML10 -Conv5,4500,64,16,16,128,64,1,1,1,1 +Conv5,2000,64,16,16,128,64,1,1,1,1 NML11 NML12 NML13 NML14 NML15 -Conv7,4500,128,16,16,128,128,1,1,1,1 +Conv7,2000,128,16,16,128,128,1,1,1,1 NML16 NML17 NML18 NML19 NML20 -Conv9,4500,128,8,8,256,128,1,1,1,1 +Conv9,2000,128,8,8,256,128,1,1,1,1 NML21 NML22 NML23 NML24 NML25 -Conv11,4500,256,8,8,256,256,1,1,1,1 +Conv11,2000,256,8,8,256,256,1,1,1,1 NML26 NML27 NML28 NML29 NML30 -Conv13,4500,256,4,4,512,256,1,1,1,1 +Conv13,2000,256,4,4,512,256,1,1,1,1 NML31 NML32 NML33 -FC1,4500,1024,1024,10 +FC1,2000,1024,1024,10 diff --git a/llvm/projects/soc_simulator/resnet18_cifar10/resnet18_layers.txt b/llvm/projects/soc_simulator/resnet18_cifar10/resnet18_layers.txt index 28a2079ddd696dcb78ada78c0d18509c92b1145c..6837e87207b24eec8c1913275aa742824a67f74f 100644 --- a/llvm/projects/soc_simulator/resnet18_cifar10/resnet18_layers.txt +++ b/llvm/projects/soc_simulator/resnet18_cifar10/resnet18_layers.txt @@ -1,41 +1,41 @@ -Conv1,5000,3,32,32,16,3,3,3,1,1 -Conv2,5000,16,32,32,16,16,3,3,1,1 -Conv3,5000,16,32,32,16,16,3,3,1,1 +Conv1,2000,3,32,32,16,3,3,3,1,1 +Conv2,2000,16,32,32,16,16,3,3,1,1 +Conv3,2000,16,32,32,16,16,3,3,1,1 NML1 NML2 -Conv4,5000,16,32,32,16,16,3,3,1,1 -Conv5,5000,16,32,32,16,16,3,3,1,1 +Conv4,2000,16,32,32,16,16,3,3,1,1 +Conv5,2000,16,32,32,16,16,3,3,1,1 NML3 NML4 -Conv6,5000,16,32,32,16,16,3,3,1,1 -Conv7,5000,16,32,32,16,16,3,3,1,1 +Conv6,2000,16,32,32,16,16,3,3,1,1 +Conv7,2000,16,32,32,16,16,3,3,1,1 NML5 NML6 -Conv8,5000,16,32,32,32,16,3,3,2,2 -Conv9,5000,32,16,16,32,32,3,3,1,1 -Conv10,5000,16,32,32,32,16,1,1,2,2 +Conv8,2000,16,32,32,32,16,3,3,2,2 +Conv9,2000,32,16,16,32,32,3,3,1,1 +Conv10,2000,16,32,32,32,16,1,1,2,2 NML7 NML8 -Conv11,5000,32,16,16,32,32,3,3,1,1 -Conv12,5000,32,16,16,32,32,3,3,1,1 +Conv11,2000,32,16,16,32,32,3,3,1,1 +Conv12,2000,32,16,16,32,32,3,3,1,1 NML9 NML10 -Conv13,5000,32,16,16,32,32,3,3,1,1 -Conv14,5000,32,16,16,32,32,3,3,1,1 +Conv13,2000,32,16,16,32,32,3,3,1,1 +Conv14,2000,32,16,16,32,32,3,3,1,1 NML11 NML12 -Conv15,5000,32,16,16,64,32,3,3,2,2 -Conv16,5000,64,8,8,64,64,3,3,1,1 -Conv17,5000,32,16,16,64,32,1,1,2,2 +Conv15,2000,32,16,16,64,32,3,3,2,2 +Conv16,2000,64,8,8,64,64,3,3,1,1 +Conv17,2000,32,16,16,64,32,1,1,2,2 NML13 NML14 -Conv18,5000,64,8,8,64,64,3,3,1,1 -Conv19,5000,64,8,8,64,64,3,3,1,1 +Conv18,2000,64,8,8,64,64,3,3,1,1 +Conv19,2000,64,8,8,64,64,3,3,1,1 NML15 NML16 -Conv20,5000,64,8,8,64,64,3,3,1,1 -Conv21,5000,64,8,8,64,64,3,3,1,1 +Conv20,2000,64,8,8,64,64,3,3,1,1 +Conv21,2000,64,8,8,64,64,3,3,1,1 NML17 NML18 NML19 -FC1,5000,64,64,10 +FC1,2000,64,64,10 diff --git a/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py b/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py new file mode 100644 index 0000000000000000000000000000000000000000..f53573f7cde9420400194827d55d84d69e2ace5b --- /dev/null +++ b/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py @@ -0,0 +1,491 @@ +from collections import defaultdict +import os +import subprocess +import sys + +class Driver: + fp16_swing = 8 + + class PrecisionTypes: + FP16 = 0 + FP32 = 1 + PROMISE = 2 + + class ApproxTypes: + PERF = 3 + SAMP = 4 + + results_time_key = "Time" + results_energy_key = "Energy" + + + def __init__(self, layer_filename, table_filename, config_filename, results_filename): + self.__layer_filename = layer_filename + self.__table_filename = table_filename + self.__config_filename = config_filename + self.__results_filename = results_filename + + # NOTE: Use an OrderedDict if we want to search by operation name + # Using a list bc we care about the order the data is read in + # since it corresponds to the data in the configuration file + self.__tensor_layers = [] + + # [layer_name][operation_name][cols] + # Operation names need to be stored in order of insertion + self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) + + self.__conf_results = [] # indexed + #self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])} + + + @staticmethod + def is_conv(operation_name): + return operation_name.startswith("Conv") + + + @staticmethod + def is_nml(operation_name): + return operation_name.startswith("NML") + + + @staticmethod + def is_fc(operation_name): + return operation_name.startswith("FC") + + + # FOR DEBUGGING ONLY + def __get_str(self, appr): + if appr == Driver.PrecisionTypes.FP16: + return "FP16" + elif appr == Driver.PrecisionTypes.FP32: + return "FP32" + elif appr == Driver.PrecisionTypes.PROMISE: + return "PROMISE" + elif appr == Driver.ApproxTypes.PERF: + return "PERF" + elif appr == Driver.ApproxTypes.SAMP: + return "SAMP" + + + def driver(self): + self.__parse_tensor_layer_file() + self.__parse_tensor_table() + self.__run_simulations() + self.__write_output() + + + def __parse_tensor_layer_file(self): + if not os.path.isfile(self.__layer_filename): + print("ERROR: %s was not found." % self.__layer_filename) + exit(1) + layer_file = open(self.__layer_filename, "r") + for line in layer_file: + layer_data = line.strip().split(',') + layer_name = layer_data[0] + + tensor_layer = defaultdict(str) + tensor_layer["Name"] = layer_name + + if Driver.is_conv(layer_name): + tensor_layer["N"] = float(layer_data[1]) + tensor_layer["Cin"] = float(layer_data[2]) + tensor_layer["H"] = float(layer_data[3]) + tensor_layer["W"] = float(layer_data[4]) + tensor_layer["Cout"] = float(layer_data[5]) + tensor_layer["Kh"] = float(layer_data[7]) + tensor_layer["Kw"] = float(layer_data[8]) + tensor_layer["Sh"] = float(layer_data[9]) + tensor_layer["Sw"] = float(layer_data[10]) + + elif Driver.is_fc(layer_name): + tensor_layer["RA"] = float(layer_data[1]) + tensor_layer["CA"] = float(layer_data[2]) + tensor_layer["RB"] = float(layer_data[3]) + tensor_layer["CB"] = float(layer_data[4]) + + elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs? + print("ERROR: Invalid layer name %s" % layer_name) + exit(1) + + self.__tensor_layers.append(tensor_layer) + layer_file.close() + + + def __parse_tensor_table(self): + if not os.path.isfile(self.__table_filename): + print("ERROR: %s was not found." % self.__table_filename) + exit(1) + table_file = open(self.__table_filename, "r") + line = table_file.readline().strip() + + while line: + # Line here MUST be a header or there's a bug + # Get the description of the layer + assert(line.startswith("**")) + header_contents = line.split(' ')[1:] + layer_name = header_contents[0] + num_ops = int(header_contents[1]) + col_names = header_contents[2:] + + layer_operations = [] + + # Go through all operations in the layer + for op_count in range(num_ops): + operation_data = defaultdict(str) + + line = table_file.readline().strip() + op_data = line.split(' ') + op_name = op_data[0] + operation_data["Name"] = op_name + + # Number of data items (#s) needs to match up with the # of cols + assert(len(op_data) - 1 == len(col_names)) + + # Go through all data items (each col element) per operation + for i in range(len(col_names)): + operation_data[col_names[i]] = float(op_data[i + 1]) + + layer_operations.append(operation_data) + + self.__tensor_table[layer_name] = layer_operations + line = table_file.readline().strip() + table_file.close() + + + @staticmethod + def is_promise(layer_hardware): + return layer_hardware == "promise" + + @staticmethod + def is_gpu(layer_hardware): + return layer_hardware == "gpu" + + def __run_simulations(self): + config_file = open(self.__config_filename, "r") + line = config_file.readline().strip() + + while line: + assert(line == "+++++") + print("CONFIGURATION") + + curr_conf_results = [] + + prev_layer = Driver.PrecisionTypes.FP32 + curr_layer = None + + line = config_file.readline().strip() + first_line = line + conf_name = line.split(' ')[0] + print("CONF NAME: %s" % conf_name) + assert(conf_name.startswith("conf")) + line = config_file.readline().strip() + + while line != "-----": + layer_as_lst = line.split(' ') + layer_results = [] + # Skip softmax + if line.find("softmax") != -1: + layer_results.append((0, 0, ' '.join(layer_as_lst[2:]))) + curr_conf_results.append((layer_as_lst[1], layer_results)) + line = config_file.readline().strip() + continue + + layer_ind = int(layer_as_lst[0]) - 1 + layer_table_data = self.__tensor_layers[layer_ind] + layer_name = layer_table_data["Name"] + + if Driver.is_promise(layer_as_lst[1]): + print("Running layer %s on PROMISE" % layer_name) + curr_layer = Driver.PrecisionTypes.PROMISE + + total_time = 0 + total_energy = 0 + + # To support multiple sets of <param> <number> in the future + for i in range(2, len(layer_as_lst), 2): + param_name = layer_as_lst[i] # Use when there's more than 1 type of param + param_val = int(layer_as_lst[i + 1]) + time, energy = self.__run_promise_simulation(param_val, layer_table_data) + total_time += time + total_energy += energy + layer_results.append((total_time, total_energy, ' '.join(layer_as_lst[2:]))) + + elif Driver.is_gpu(layer_as_lst[1]): + print("Running layer %s on the GPU" % layer_name) + + tensor_count = 0 + + # 3 elements per tensor operation + for i in range(2, len(layer_as_lst), 3): + op_type = layer_as_lst[i] + precision_type = layer_as_lst[i + 1] + op_number = layer_as_lst[i + 2] + #print(' '.join(layer_as_lst[i : i + 3])) + + approx_type = None + if line.find("fp16") != -1: + curr_layer = Driver.PrecisionTypes.FP16 + elif line.find("fp32") != -1: + curr_layer = Driver.PrecisionTypes.FP32 + if precision_type == "perf" or precision_type == "samp": # Handle approx type + if precision_type == "perf": + approx_type = Driver.ApproxTypes.PERF + elif precision_type == "samp": + approx_type = Driver.ApproxTypes.SAMP + curr_layer = Driver.PrecisionTypes.FP16 + print(curr_layer, prev_layer) + quant_time, quant_energy = self.__quantize(precision_type, op_number, curr_layer, prev_layer, tensor_count, layer_table_data) + if quant_time != 0: + assert i == 2 #and layer_ind == 0 + conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \ + tensor_count, approx_type, op_number) + layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3]))) + prev_layer = curr_layer + tensor_count += 1 + + line = config_file.readline().strip() + prev_layer = curr_layer + curr_conf_results.append((layer_as_lst[1], layer_results)) + + if not self.__conf_results: # we're appending the baseline + # need to find the fp16 baseline + self.fp16_baseline = [] + + prev_layer = Driver.PrecisionTypes.FP32 + curr_layer = None + + has_quantized = False + for layer_ind, (hardware, layer) in enumerate(curr_conf_results): + if len(layer) == 1 and layer[0][2].find("softmax") != -1: continue + fp16_layer = [] + #print(layer_ind, hardware, layer) + layer_table_data = self.__tensor_layers[layer_ind] + layer_name = layer_table_data["Name"] + + for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): + curr_layer = Driver.PrecisionTypes.FP16 # always + + quant_time, quant_energy = self.__quantize("fp16", "1", curr_layer, prev_layer, tensor_ind, layer_table_data) + if quant_time != 0: + assert not has_quantized + has_quantized = True + tensor_info = self.__tensor_table[layer_name][tensor_ind] + fp16_time = tensor_info["fp16_time"] + quant_time + fp16_energy = tensor_info["fp16_energy"] + quant_energy + fp16_layer.append((fp16_time, fp16_energy, tensor_op.replace("fp32", "fp16"))) + prev_layer = curr_layer + + prev_layer = curr_layer + self.fp16_baseline.append((hardware, fp16_layer)) + self.__conf_results.append( (first_line, curr_conf_results) ) + line = config_file.readline().strip() + config_file.close() + + + def __quantize(self, precision_type, op_number, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + if curr_layer == prev_layer or curr_layer == Driver.PrecisionTypes.PROMISE \ + or prev_layer == Driver.PrecisionTypes.PROMISE: + return 0.0, 0.0 + layer_name = layer_data["Name"] + + # NOTE: Ignoring logic where curr == promise or prev == promise bc + # smartDMA is always true so we'd return near the beginning of the method + + # Get h2f/f2h data using the first tensor operation in the layer + # (which is why order matters in the tensor table) + tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] + time_key = None + energy_key = None + + if op_number == "1": + lookup_key = "_" #lookup_key = precision_type + else: + lookup_key = "_" + precision_type + str(op_number) + "_" + + print(curr_layer) + if curr_layer == Driver.PrecisionTypes.FP32: + time_key = "h2f%stime" % lookup_key + energy_key = "h2f%senergy" % lookup_key + elif curr_layer == Driver.PrecisionTypes.FP16: + time_key = "f2h%stime" % lookup_key + energy_key = "f2h%senergy" % lookup_key + time = tensor_op_row[time_key] + energy = tensor_op_row[energy_key] + print(time_key, energy_key) + print("Quantization: (%f, %f)" % (time, energy)) + return (time, energy) + + + def __run_promise_simulation(self, swing, layer_data): + layer_name = layer_data["Name"] + patch_factor = 1 + + if Driver.is_conv(layer_name): + rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ + / (layer_data["Sh"] * layer_data["Sw"]) + cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] + rows_b = cols_a + cols_b = layer_data["Cout"] + patch_factor = layer_data["Kh"] * layer_data["Kw"] + elif Driver.is_fc(layer_name): + rows_a = layer_data["RA"] + cols_a = layer_data["CA"] + rows_b = cols_ + cols_b = layer_data["CB"] + else: + print("PROMISE can't run whatever this layer is.") + exit(1) + # Run promise simulator + # TODO need to print time and energy in the ptm runner so we can pipe it + output = subprocess.Popen(["./ptm_new", str(rows_a), str(cols_a), str(rows_b), \ + str(cols_b), str(patch_factor), str(swing)], \ + stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] + total_time_energy = output.strip().split(',') + + assert(len(total_time_energy) == 2) + return float(total_time_energy[0]), float(total_time_energy[1]) + + + def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, \ + approx_type = None, knob_number = None): + tensor_info = self.__tensor_table[layer_name][tensor_ind] + #print(tensor_info) + #print(layer_name) + #print(tensor_ind) + time_key = None + energy_key = None + + if approx_type == Driver.ApproxTypes.PERF or approx_type == Driver.ApproxTypes.SAMP: # fp16_perf2_energy + approx_type_str = None + if approx_type == Driver.ApproxTypes.PERF: + approx_type_str = "perf" + elif approx_type == Driver.ApproxTypes.SAMP: + approx_type_str = "samp" + + if curr_layer == Driver.PrecisionTypes.FP32: + time_key = "fp32_%s%s_time" % (approx_type_str, knob_number) + energy_key = "fp32_%s%s_energy" % (approx_type_str, knob_number) + + elif curr_layer == Driver.PrecisionTypes.FP16: + time_key = "fp16_%s%s_time" % (approx_type_str, knob_number) + energy_key = "fp16_%s%s_energy" % (approx_type_str, knob_number) + + else: # None for now + if curr_layer == Driver.PrecisionTypes.FP32: + time_key = "fp32_time" + energy_key = "fp32_energy" + + elif curr_layer == Driver.PrecisionTypes.FP16: + time_key = "fp16_time" + energy_key = "fp16_energy" + #print(time_key, energy_key) + conversion_time = tensor_info[time_key] + conversion_energy = tensor_info[energy_key] + #print("GPU: (%f, %f)\n" % (conversion_time, conversion_energy)) + return conversion_time, conversion_energy + + + def __write_output(self): + config_file = open(self.__config_filename, "r") + results_file = open(self.__results_filename, "w") + + def write_conf_to_file(conf_name, final_conf, time_speedup, energy_speedup): + # conf = [layer value if promise], [tensor vals if gpu]] + conf_str = ["+++++"] + + # process the first line + first_line, layers = final_conf + first_line_lst = first_line.split(' ') + assert first_line_lst[0] == conf_name + + new_header = [conf_name] + new_header.append(repr(time_speedup)) + new_header.append(repr(energy_speedup)) + new_header.append(repr(abs(float(first_line_lst[-2])))) + new_header.append(repr(abs(float(first_line_lst[-1])))) + conf_str.append(' '.join(new_header)) + + for ind, (hardware, layer) in enumerate(layers): + layer_lst = [str(ind + 1)] + layer_lst.append(hardware) + for op_time, op_energy, tensor_op in layer: + layer_lst.append(tensor_op) + conf_str.append(' '.join(layer_lst)) + conf_str.append("-----\n") + results_file.write('\n'.join(conf_str)) + + baseline_conf = None + baseline_total_time = baseline_total_energy = 0 + + def get_baseline_times_energies(conf): + curr_time = curr_energy = 0 + for hardware, layer in conf[1]: + for op_time, op_energy, tensor_op in layer: + curr_time += op_time + curr_energy += op_energy + return curr_time, curr_energy + + def get_final_times_energies_conf(curr_conf, curr_conf_name): + final_time = final_energy = 0 + + final_conf = [] # List (conf) of lists (layers) of tuples (operation data) + + #for hardware, layer in self.fp16_baseline: + #print(hardware, layer) + for layer_ind, (hardware, layer) in enumerate(curr_conf[1]): + final_conf_layer = [] + + for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): + if tensor_op.find("softmax") != -1: + final_conf_layer.append((None, None, tensor_op)) + continue + # layer name, operation name, val name + baseline_time = self.fp16_baseline[layer_ind][1][tensor_ind][0] + baseline_energy = self.fp16_baseline[layer_ind][1][tensor_ind][1] + baseline_op = self.fp16_baseline[layer_ind][1][tensor_ind][2] + #print(baseline_time, baseline_energy, baseline_op) + #print(op_time, tensor_op) + final_tensor_op = tensor_op + #print(op_time > baseline_time) + if op_time > baseline_time: + #print("**************** BIGGER ******************") + #print(curr_conf_name) + #print(baseline_time, baseline_energy, baseline_op, layer_ind) + #print(op_time, tensor_op, layer_ind) + final_time += baseline_time + final_energy += baseline_energy + final_tensor_op = baseline_op + else: + final_time += op_time + final_energy += op_energy + final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing + final_conf.append((hardware, final_conf_layer)) + #print("\n") + return final_time, final_energy, (curr_conf[0], final_conf) + + conf_index = 0 + print("RESULTS") + for line in config_file: + if line.startswith("conf"): + orig_line_lst = line.split(' ') + conf_name = orig_line_lst[0] + + if not baseline_conf: + baseline_conf = self.__conf_results[conf_index] #conf_name] + baseline_total_time, baseline_total_energy = get_baseline_times_energies(baseline_conf) + results_file.write("%s\n" % repr(baseline_total_time)) + write_conf_to_file(conf_name, baseline_conf, 1, 1) + else: + curr_conf = self.__conf_results[conf_index] #conf_name] + #final_time, final_energy, = get_baseline_times_energies(curr_conf) + final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf, conf_name) + write_conf_to_file(conf_name, curr_conf, baseline_total_time / final_time, baseline_total_energy / final_energy) + conf_index += 1 + results_file.close() + config_file.close() + +if __name__ == "__main__": + if len(sys.argv) != 5: + print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>") + exit(1) + Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver() diff --git a/llvm/projects/soc_simulator/src/driver_new_config.py b/llvm/projects/soc_simulator/src/driver_new_config_no_fp16_repl.py similarity index 59% rename from llvm/projects/soc_simulator/src/driver_new_config.py rename to llvm/projects/soc_simulator/src/driver_new_config_no_fp16_repl.py index 6230092b41c6c63084373f4ab56ec4797095bdca..d12477fd77533f94ff067e05771459ff4c830bb8 100644 --- a/llvm/projects/soc_simulator/src/driver_new_config.py +++ b/llvm/projects/soc_simulator/src/driver_new_config_no_fp16_repl.py @@ -13,7 +13,8 @@ class Driver: class ApproxTypes: PERF = 3 - + SAMP = 4 + results_time_key = "Time" results_energy_key = "Energy" @@ -33,9 +34,8 @@ class Driver: # Operation names need to be stored in order of insertion self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) - # [Time/Energy][config name] = time/energy - self.__aggregate_results = defaultdict(lambda: defaultdict(float)) - self.__config_count = 0 + self.__conf_results = [] # indexed + #self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])} @staticmethod @@ -63,6 +63,8 @@ class Driver: return "PROMISE" elif appr == Driver.ApproxTypes.PERF: return "PERF" + elif appr == Driver.ApproxTypes.SAMP: + return "SAMP" def driver(self): @@ -76,7 +78,6 @@ class Driver: if not os.path.isfile(self.__layer_filename): print("ERROR: %s was not found." % self.__layer_filename) exit(1) - layer_file = open(self.__layer_filename, "r") for line in layer_file: layer_data = line.strip().split(',') @@ -138,7 +139,8 @@ class Driver: operation_data["Name"] = op_name # Number of data items (#s) needs to match up with the # of cols - assert(len(op_data) - 1 == len(col_names)) + assert(len(op_data) - 1 == len(col_names)) + # Go through all data items (each col element) per operation for i in range(len(col_names)): operation_data[col_names[i]] = float(op_data[i + 1]) @@ -165,22 +167,29 @@ class Driver: while line: assert(line == "+++++") print("CONFIGURATION") - + + curr_conf_results = [] + prev_layer = Driver.PrecisionTypes.FP32 curr_layer = None line = config_file.readline().strip() + first_line = line conf_name = line.split(' ')[0] + print("CONF NAME: %s" % conf_name) assert(conf_name.startswith("conf")) line = config_file.readline().strip() while line != "-----": + layer_as_lst = line.split(' ') + layer_results = [] # Skip softmax if line.find("softmax") != -1: + layer_results.append((0, 0, ' '.join(layer_as_lst[2:]))) + curr_conf_results.append((layer_as_lst[1], layer_results)) line = config_file.readline().strip() continue - layer_as_lst = line.split(' ') layer_ind = int(layer_as_lst[0]) - 1 layer_table_data = self.__tensor_layers[layer_ind] layer_name = layer_table_data["Name"] @@ -199,17 +208,11 @@ class Driver: time, energy = self.__run_promise_simulation(param_val, layer_table_data) total_time += time total_energy += energy - - self.__aggregate_results[Driver.results_time_key][conf_name] += total_time - self.__aggregate_results[Driver.results_energy_key][conf_name] += total_energy - print(total_time, total_energy) - print("AGGREGATE RESULTS: ", self.__aggregate_results) + layer_results.append((total_time, total_energy, ' '.join(layer_as_lst[2:]))) elif Driver.is_gpu(layer_as_lst[1]): print("Running layer %s on the GPU" % layer_name) - total_time = 0 - total_energy = 0 tensor_count = 0 # 3 elements per tensor operation @@ -219,45 +222,45 @@ class Driver: op_number = layer_as_lst[i + 2] approx_type = None - - if precision_type == "fp16" or line.find("fp16") != -1: + if line.find("fp16") != -1: curr_layer = Driver.PrecisionTypes.FP16 - elif precision_type == "fp32" or line.find("fp32") != -1: + elif line.find("fp32") != -1: curr_layer = Driver.PrecisionTypes.FP32 - elif precision_type == "perf": # Handle approx type - approx_type = Driver.ApproxTypes.PERF + + if precision_type == "perf" or precision_type == "samp": # Handle approx type + if precision_type == "perf": + approx_type = Driver.ApproxTypes.PERF + elif precision_type == "samp": + approx_type = Driver.ApproxTypes.SAMP if line.find("fp16") != -1: curr_layer = Driver.PrecisionTypes.FP16 elif line.find("fp32") != -1: curr_layer = Driver.PrecisionTypes.FP32 - - quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, \ + quant_time, quant_energy = self.__quantize(op_type, precision_type, op_number, curr_layer, prev_layer, \ tensor_count, layer_table_data) + if quant_time != 0: + assert i == 2 conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \ tensor_count, approx_type, op_number) - - total_time += quant_time + conv_time - total_energy += quant_energy + conv_energy + print(quant_time, quant_energy, conv_time, conv_energy) + layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3]))) prev_layer = curr_layer tensor_count += 1 - print(total_time, total_energy) - self.__aggregate_results[Driver.results_time_key][conf_name] += total_time - self.__aggregate_results[Driver.results_energy_key][conf_name] += total_energy - print(Driver.results_energy_key, conf_name) - print("AGGREGATE RESULTS", self.__aggregate_results) line = config_file.readline().strip() prev_layer = curr_layer + curr_conf_results.append((layer_as_lst[1], layer_results)) + self.__conf_results.append( (first_line, curr_conf_results) ) line = config_file.readline().strip() config_file.close() - print("AGGREGATE RESULTS", self.__aggregate_results) - def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + + def __quantize(self, op_type, precision_type, op_number, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): if curr_layer == prev_layer or curr_layer == Driver.PrecisionTypes.PROMISE \ or prev_layer == Driver.PrecisionTypes.PROMISE: return 0.0, 0.0 - + print("IN QUANTIZE") layer_name = layer_data["Name"] # NOTE: Ignoring logic where curr == promise or prev == promise bc @@ -266,13 +269,25 @@ class Driver: # Get h2f/f2h data using the first tensor operation in the layer # (which is why order matters in the tensor table) tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] + time_key = None + energy_key = None + + print(precision_type, op_number) + if op_number == "1": + lookup_key = "_" #lookup_key = precision_type + else: + lookup_key = "_" + precision_type + str(op_number) + "_" + + print("QUANT LOOKUP KEY", lookup_key) if curr_layer == Driver.PrecisionTypes.FP32: - time = tensor_op_row["h2f_time"] - energy = tensor_op_row["h2f_energy"] + time_key = "h2f%stime" % lookup_key + energy_key = "h2f%senergy" % lookup_key elif curr_layer == Driver.PrecisionTypes.FP16: - time = tensor_op_row["f2h_time"] - energy = tensor_op_row["f2h_energy"] - + time_key = "f2h%stime" % lookup_key + energy_key = "f2h%senergy" % lookup_key + print(time_key, energy_key) + time = tensor_op_row[time_key] + energy = tensor_op_row[energy_key] print("Quantization: (%f, %f)" % (time, energy)) return (time, energy) @@ -291,7 +306,7 @@ class Driver: elif Driver.is_fc(layer_name): rows_a = layer_data["RA"] cols_a = layer_data["CA"] - rows_b = cols_a + rows_b = cols_ cols_b = layer_data["CB"] else: print("PROMISE can't run whatever this layer is.") @@ -304,61 +319,143 @@ class Driver: total_time_energy = output.strip().split(',') assert(len(total_time_energy) == 2) - print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) return float(total_time_energy[0]), float(total_time_energy[1]) def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, \ approx_type = None, knob_number = None): tensor_info = self.__tensor_table[layer_name][tensor_ind] + #print(tensor_info) + #print(layer_name) + #print(tensor_ind) + time_key = None + energy_key = None + + if approx_type == Driver.ApproxTypes.PERF or approx_type == Driver.ApproxTypes.SAMP: # fp16_perf2_energy + approx_type_str = None + if approx_type == Driver.ApproxTypes.PERF: + approx_type_str = "perf" + elif approx_type == Driver.ApproxTypes.SAMP: + approx_type_str = "samp" - if approx_type == Driver.ApproxTypes.PERF: # fp16_perf2_energy if curr_layer == Driver.PrecisionTypes.FP32: - conversion_time = tensor_info["fp32_perf%s_time" % knob_number] - conversion_energy = tensor_info["fp32_perf%s_energy" % knob_number] + time_key = "fp32_%s%s_time" % (approx_type_str, knob_number) + energy_key = "fp32_%s%s_energy" % (approx_type_str, knob_number) elif curr_layer == Driver.PrecisionTypes.FP16: - conversion_time = tensor_info["fp16_perf%s_time" % knob_number] - conversion_energy = tensor_info["fp16_perf%s_energy" % knob_number] + time_key = "fp16_%s%s_time" % (approx_type_str, knob_number) + energy_key = "fp16_%s%s_energy" % (approx_type_str, knob_number) else: # None for now if curr_layer == Driver.PrecisionTypes.FP32: - conversion_time = tensor_info["fp32_time"] - conversion_energy = tensor_info["fp32_energy"] + time_key = "fp32_time" + energy_key = "fp32_energy" elif curr_layer == Driver.PrecisionTypes.FP16: - conversion_time = tensor_info["fp16_time"] - conversion_energy = tensor_info["fp16_energy"] - print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) + time_key = "fp16_time" + energy_key = "fp16_energy" + print(time_key, energy_key) + conversion_time = tensor_info[time_key] + conversion_energy = tensor_info[energy_key] + #print("GPU: (%f, %f)\n" % (conversion_time, conversion_energy)) return conversion_time, conversion_energy def __write_output(self): - # Need to write the time/energy pairs to the configuration file - results_file = open(self.__results_filename, "w") - # Layout is based off the configuration filename config_file = open(self.__config_filename, "r") - + results_file = open(self.__results_filename, "w") + + def write_conf_to_file(conf_name, final_conf, time_speedup, energy_speedup): + # conf = [layer value if promise], [tensor vals if gpu]] + conf_str = ["+++++"] + + # process the first line + first_line, layers = final_conf + first_line_lst = first_line.split(' ') + assert first_line_lst[0] == conf_name + + new_header = [conf_name] + new_header.append(repr(time_speedup)) + new_header.append(repr(energy_speedup)) + new_header.append(repr(abs(float(first_line_lst[-2])))) + new_header.append(repr(abs(float(first_line_lst[-1])))) + conf_str.append(' '.join(new_header)) + + for ind, (hardware, layer) in enumerate(layers): + print(layer) + layer_lst = [str(ind + 1)] + layer_lst.append(hardware) + print(layer_lst) + for op_time, op_energy, tensor_op in layer: + layer_lst.append(tensor_op) + conf_str.append(' '.join(layer_lst)) + conf_str.append("-----\n") + results_file.write('\n'.join(conf_str)) + + baseline_conf = None + baseline_total_time = baseline_total_energy = 0 + + def get_baseline_times_energies(conf): + curr_time = curr_energy = 0 + print("RESULTS: ", conf[1]) + for hardware, layer in conf[1]: + for op_time, op_energy, tensor_op in layer: + curr_time += op_time + curr_energy += op_energy + return curr_time, curr_energy + + def get_final_times_energies_conf(curr_conf): + final_time = final_energy = 0 + + final_conf = [] # List (conf) of lists (layers) of tuples (operation data) + + for layer_ind, (hardware, layer) in enumerate(curr_conf[1]): + final_conf_layer = [] + + for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): + baseline_time, baseline_energy, baseline_op = baseline_conf[1][layer_ind][tensor_ind] + final_tensor_op = tensor_op + if op_time > baseline_time: + print("**************** BIGGER ******************") + final_time += baseline_time + final_energy += baseline_energy + final_tensor_op = baseline_op + else: + final_time += op_time + final_energy += op_energy + ''' + # Ignoring bigger energies for now + if op_energy > baseline_energy: + final_time += baseline_energy + final_energy += baseline_energy + final_tensor_op = baseline_op + else: + final_time += op_time + final_energy += op_energy + ''' + final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing + final_conf.append(final_conf_layer) + return final_time, final_energy, (curr_conf[0], final_conf) + + conf_index = 0 + print("RESULTS") for line in config_file: - if line.startswith("conf"): # Write in the time and energy + if line.startswith("conf"): orig_line_lst = line.split(' ') - conf_header = [] conf_name = orig_line_lst[0] - - conf_header.append(conf_name) - conf_header.append(repr(self.__aggregate_results[Driver.results_time_key][conf_name])) - conf_header.append(repr(self.__aggregate_results[Driver.results_energy_key][conf_name])) - - # TODO Accuracy/accuracy loss - conf_header.append(orig_line_lst[-2]) - conf_header.append(orig_line_lst[-1]) - - results_file.write("%s" % ' '.join(conf_header)) - else: - results_file.write("%s" % line) # Copy the line - config_file.close() - results_file.close() + if not baseline_conf: + baseline_conf = self.__conf_results[conf_index] #conf_name] + baseline_total_time, baseline_total_energy = get_baseline_times_energies(baseline_conf) + results_file.write("%s\n" % repr(baseline_total_time)) + write_conf_to_file(conf_name, baseline_conf, 1, 1) + else: + curr_conf = self.__conf_results[conf_index] #conf_name] + final_time, final_energy = get_baseline_times_energies(curr_conf) + write_conf_to_file(conf_name, curr_conf, baseline_total_time / final_time, baseline_total_energy / final_energy) + conf_index += 1 + results_file.close() + config_file.close() if __name__ == "__main__": if len(sys.argv) != 5: diff --git a/llvm/projects/soc_simulator/src/table_generator.py b/llvm/projects/soc_simulator/src/table_generator.py index e3b94082f5be7b83a1598625afd5ef05a0472506..528b8e0ef5677cec9ccdba37abfde696544029cc 100644 --- a/llvm/projects/soc_simulator/src/table_generator.py +++ b/llvm/projects/soc_simulator/src/table_generator.py @@ -29,23 +29,23 @@ class TableGenerator: ''' precision_conversions = frozenset(["h2f", "f2h"]) - def __init__(self, dir_path, iters, profiler_binary_name): + def __init__(self, network_name, dir_path, soc_ops_file, iters, profiler_binary_name): ''' Args: dir_path: Path of directory containing network binaries iters: Number of iterations to run each binary for profiler_binary_name: Name of offline profiler binary to run ''' + self.__network_name = network_name self.__dir_path = dir_path # Name of the actual directory - self.__network_name = os.path.split(dir_path)[-1] - + self.__soc_ops_filename = soc_ops_file self.__iters = iters self.__profiler_binary_name = profiler_binary_name # Path to results directory - self.__results_dir_path = "%s_results" % self.__dir_path + self.__results_dir_path = "%s_results" % self.__network_name # Outputted table file self.__table_filename = "%s_tensors.txt" % self.__network_name @@ -65,7 +65,7 @@ class TableGenerator: 3. Writes the internal table to <network_name>_tensors.txt file and uses the <network_name>_ops.txt file as a guideline in terms of row order ''' - self.__run_inputted_binaries() + #self.__run_inputted_binaries() self.__build_internal_table() self.__output_table_to_file() @@ -96,7 +96,7 @@ class TableGenerator: if not self.__should_execute_file(binary_path): continue - + output_file = os.path.join(self.__results_dir_path, binary_name + ".txt") # No stdout/stderr piping needed for now subprocess.Popen([profiler_binary_name, binary_path, str(self.__iters), \ @@ -117,11 +117,9 @@ class TableGenerator: approx_type = self.__get_approximation_type(results_file_name) results_file = open(os.path.join(self.__results_dir_path, results_file_name), "r") - for line in results_file: line = line.strip() op_name, total_time, total_energy = self.__parse_tensor_operation_line(line) - # If the current operation is f2h or h2f if any(op_name.endswith(prec_conv) for prec_conv in TableGenerator.precision_conversions): # Get the original operation name (without the f2h/h2f) and the conversion type @@ -132,8 +130,9 @@ class TableGenerator: exit(1) # Store f2h and h2f as columns in the row belonging to the original operation - self.__table[orig_op_name][conversion_type][TableGenerator.__time_col_name] = total_time - self.__table[orig_op_name][conversion_type][TableGenerator.__energy_col_name] = total_energy + approx_type_no_fp_prefix = approx_type[5 : ] + self.__table[orig_op_name][conversion_type + "_" + approx_type_no_fp_prefix][TableGenerator.__time_col_name] = total_time + self.__table[orig_op_name][conversion_type + "_" + approx_type_no_fp_prefix][TableGenerator.__energy_col_name] = total_energy # Create a new row in the dictionary else: @@ -152,10 +151,7 @@ class TableGenerator: time and the energy ''' table_file_path = os.path.join(self.__results_dir_path, self.__table_filename) - soc_operations_file_name = os.path.join("/", "home", "nvidia", "soc_simulator", \ - "%s_cifar10" % self.__network_name, "%s_ops.txt" % self.__network_name) - - soc_operations_file = open(soc_operations_file_name, "r") + soc_operations_file = open(self.__soc_ops_filename, "r") table_file = open(table_file_path, "w") curr_line = soc_operations_file.readline().strip() @@ -182,22 +178,27 @@ class TableGenerator: # Stores a list of elements that will be joined to make up a row curr_op = [curr_line] operation_data = self.__table[curr_line] - # Iterate through time/energy data for each approximation type corresponding # to the current operation for approx_type in operation_data: op_time = operation_data[approx_type][TableGenerator.__time_col_name] op_energy = operation_data[approx_type][TableGenerator.__energy_col_name] - curr_op.append(op_time) curr_op.append(op_energy) - if op_in_layer_count == 0: - header.append("%s_time" % approx_type) - header.append("%s_energy" % approx_type) - + if approx_type == "fp32_perf20": + header.append("fp32_time") + header.append("fp32_energy") + elif approx_type == "fp16_perf20": + header.append("fp16_time") + header.append("fp16_energy") + elif approx_type.find("f2h_perf20") != -1: + header.append("f2h_time") + header.append("f2h_energy") + else: + header.append("%s_time" % approx_type) + header.append("%s_energy" % approx_type) ops_in_layer.append(' '.join(curr_op)) - # Getting all operation rows and then writing everything because # calls to write() are slow (memory vs time tradeoff) table_file.write("%s\n%s\n" % (' '.join(header), '\n'.join(ops_in_layer))) @@ -229,8 +230,7 @@ class TableGenerator: Returns: the approximation technique (ex: fp16) ''' - approx_type_start_ind = results_filename.find(self.__network_name) \ - + len(self.__network_name) + 1 # + 1 to account for _ delimiter + approx_type_start_ind = results_filename.find("_", results_filename.find("_") + 1) + 1 approx_type_end_ind = results_filename.find(".txt") return results_filename[approx_type_start_ind : approx_type_end_ind] @@ -294,11 +294,15 @@ class TableGenerator: if __name__ == "__main__": - if len(sys.argv) != 4: - print("python table_generator.py <binary dir path> <num itrs> <profiler bin path>") + if len(sys.argv) != 6: + print("python table_generator.py <network name> <binary dir path> <soc_ops file> <num itrs> <profiler bin path>") + print("soc ops file: ~/soc_simular/%s_cifar10/%s_ops.txt") exit(1) - binary_dir_path = sys.argv[1] - num_iters = int(sys.argv[2]) - profiler_binary_name = sys.argv[3] - table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name) + + network_name = sys.argv[1] + binary_dir_path = sys.argv[2] + soc_ops_file = sys.argv[3] + num_iters = int(sys.argv[4]) + profiler_binary_name = sys.argv[5] + table_gen = TableGenerator(network_name, binary_dir_path, soc_ops_file, num_iters, profiler_binary_name) table_gen.generate_table() diff --git a/llvm/projects/soc_simulator/vgg16_cifar10/vgg16_layers.txt b/llvm/projects/soc_simulator/vgg16_cifar10/vgg16_layers.txt index 0865a73690fa33e4ddb14e9c962674088320071c..af6469192145b246beaec42cf42a6629e5ed1a93 100644 --- a/llvm/projects/soc_simulator/vgg16_cifar10/vgg16_layers.txt +++ b/llvm/projects/soc_simulator/vgg16_cifar10/vgg16_layers.txt @@ -1,15 +1,15 @@ -Conv1,5000,3,32,32,64,3,3,3,1,1 -Conv2,5000,64,32,32,64,64,3,3,1,1 -Conv3,5000,64,16,16,128,64,3,3,1,1 -Conv4,5000,128,16,16,128,128,3,3,1,1 -Conv5,5000,128,8,8,256,128,3,3,1,1 -Conv6,5000,256,8,8,256,256,3,3,1,1 -Conv7,5000,256,8,8,256,256,3,3,1,1 -Conv8,5000,256,4,4,512,256,3,3,1,1 -Conv9,5000,512,4,4,512,512,3,3,1,1 -Conv10,5000,512,4,4,512,512,3,3,1,1 -Conv11,5000,512,2,2,512,512,3,3,1,1 -Conv12,5000,512,2,2,512,512,3,3,1,1 -Conv13,5000,512,2,2,512,512,3,3,1,1 -FC1,5000,512,512,512 -FC2,5000,512,512,10 +Conv1,2000,3,32,32,64,3,3,3,1,1 +Conv2,2000,64,32,32,64,64,3,3,1,1 +Conv3,2000,64,16,16,128,64,3,3,1,1 +Conv4,2000,128,16,16,128,128,3,3,1,1 +Conv5,2000,128,8,8,256,128,3,3,1,1 +Conv6,2000,256,8,8,256,256,3,3,1,1 +Conv7,2000,256,8,8,256,256,3,3,1,1 +Conv8,2000,256,4,4,512,256,3,3,1,1 +Conv9,2000,512,4,4,512,512,3,3,1,1 +Conv10,2000,512,4,4,512,512,3,3,1,1 +Conv11,2000,512,2,2,512,512,3,3,1,1 +Conv12,2000,512,2,2,512,512,3,3,1,1 +Conv13,2000,512,2,2,512,512,3,3,1,1 +FC1,2000,512,512,512 +FC2,2000,512,512,10 diff --git a/llvm/projects/soc_simulator/vgg16_cifar100/vgg16_layers.txt b/llvm/projects/soc_simulator/vgg16_cifar100/vgg16_layers.txt index 0865a73690fa33e4ddb14e9c962674088320071c..af6469192145b246beaec42cf42a6629e5ed1a93 100644 --- a/llvm/projects/soc_simulator/vgg16_cifar100/vgg16_layers.txt +++ b/llvm/projects/soc_simulator/vgg16_cifar100/vgg16_layers.txt @@ -1,15 +1,15 @@ -Conv1,5000,3,32,32,64,3,3,3,1,1 -Conv2,5000,64,32,32,64,64,3,3,1,1 -Conv3,5000,64,16,16,128,64,3,3,1,1 -Conv4,5000,128,16,16,128,128,3,3,1,1 -Conv5,5000,128,8,8,256,128,3,3,1,1 -Conv6,5000,256,8,8,256,256,3,3,1,1 -Conv7,5000,256,8,8,256,256,3,3,1,1 -Conv8,5000,256,4,4,512,256,3,3,1,1 -Conv9,5000,512,4,4,512,512,3,3,1,1 -Conv10,5000,512,4,4,512,512,3,3,1,1 -Conv11,5000,512,2,2,512,512,3,3,1,1 -Conv12,5000,512,2,2,512,512,3,3,1,1 -Conv13,5000,512,2,2,512,512,3,3,1,1 -FC1,5000,512,512,512 -FC2,5000,512,512,10 +Conv1,2000,3,32,32,64,3,3,3,1,1 +Conv2,2000,64,32,32,64,64,3,3,1,1 +Conv3,2000,64,16,16,128,64,3,3,1,1 +Conv4,2000,128,16,16,128,128,3,3,1,1 +Conv5,2000,128,8,8,256,128,3,3,1,1 +Conv6,2000,256,8,8,256,256,3,3,1,1 +Conv7,2000,256,8,8,256,256,3,3,1,1 +Conv8,2000,256,4,4,512,256,3,3,1,1 +Conv9,2000,512,4,4,512,512,3,3,1,1 +Conv10,2000,512,4,4,512,512,3,3,1,1 +Conv11,2000,512,2,2,512,512,3,3,1,1 +Conv12,2000,512,2,2,512,512,3,3,1,1 +Conv13,2000,512,2,2,512,512,3,3,1,1 +FC1,2000,512,512,512 +FC2,2000,512,512,10 diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet_loop.cpp index 50732550db8c8f02c940e485702c3253a7bb9760..a62b2ea33ad7cc2838e68965e2ce19f615555b93 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet_loop.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet_loop.cpp @@ -437,7 +437,7 @@ int main(){ args->dense_1_b_bytes = 0; int batch_size = 500; - int test_input_size = 10000; + int test_input_size = 5000; int batch_count = test_input_size / batch_size; // void* input = create4DTensor(0,nchw,batch_size,3,32,32); diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/src/alexnet2_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/src/alexnet2_loop.cpp index 91bf3b0c4523e7239d7f11f6ad350f9dbb454a91..e3c06325a67e648da47f710c72498308da1041be 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/src/alexnet2_loop.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/src/alexnet2_loop.cpp @@ -487,7 +487,7 @@ int main(){ int batch_size = 500; - int test_input_size = 10000; + int test_input_size = 5000; int batch_count = test_input_size / batch_size; std::string input_path = dir_prefix + std::string("input.bin"); diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/src/lenet_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/src/lenet_loop.cpp index f3b20f01121f667562a2ef72d99e3f518ee84af6..88f302256933bae34169fa051559ef42adfd321b 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/src/lenet_loop.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/src/lenet_loop.cpp @@ -321,7 +321,7 @@ int main(){ int batch_size = 500; - int test_input_size = 10000; + int test_input_size = 5000; int batch_count = test_input_size / batch_size; startMemTracking(); @@ -334,7 +334,7 @@ int main(){ void* input = readInputBatch(input_path.c_str(), 0, start, end, - 3, 32, 32); + 1, 28, 28); args->input = input; args->input_bytes = 0; diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/Makefile index b5cf2abaa0101bc0515bdd5e7658343b56307e6a..1b1941b74beea7046a2fdc419955571da9832ce1 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/Makefile +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/Makefile @@ -1,6 +1,7 @@ DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks # NOTE: can configure build directory -HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_hpvm/ +#HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_hpvm/ +HPVM_BUILD_DIR = $(LLVM_BUILD_ROOT) CC = $(HPVM_BUILD_DIR)/bin/clang++ OPT = $(HPVM_BUILD_DIR)/bin/opt @@ -15,6 +16,8 @@ APP = mobilenet TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a +PROFILER_LIB_DIR = $(LLVM_SRC_ROOT)/projects/gpu_profiler/lib/libgpu_profiler.a +SOC_SIMULATOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/soc_simulator/lib/libpromise_profiler.a TENSOR_AUTOTUNER_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_autotuner.a CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3 @@ -27,10 +30,14 @@ HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG -QUANT_FILE_PATH=/home/hsharif3/Gitlab/hpvm/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/quant_ranges.txt +PROMISE_QUANT_FILE_PATH=$(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks/benchmarks/$(APP)/data/quant_ranges.txt -VISC_OPTFLAGS2 = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_PROMISE.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-promise -quantization-levels-filename=$(QUANT_FILE_PATH) -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG +VISC_OPTFLAGS2 = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_PROMISE.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-promise -quantization-levels-filename=$(PROMISE_QUANT_FILE_PATH) -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG +WRAPPER_API_QUANT_FILE_PATH=$(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks/benchmarks/$(APP)/data/quant_ranges_rt.txt +CONF_FILE_PATH=$(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks/benchmarks/$(APP)/data/tuner_confs_base.txt + +VISC_OPTFLAGS3 = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_WrapperAPI.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-wrapperapi -quantization-levels-filename=$(WRAPPER_API_QUANT_FILE_PATH) -configuration-inputs-filename=$(CONF_FILE_PATH) -dfg2llvm-x86 -clearDFG TARGET = $(BUILD_DIR)/$(APP).opt.bc @@ -45,18 +52,25 @@ default: $(BUILD_DIR) $(TARGET) $(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp $(CC) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -S -o $(BUILD_DIR)/$(APP).ll $(CC) $(CC_FLAGS) -emit-llvm src/$(APP)_promise.cpp -S -o $(BUILD_DIR)/$(APP)_promise.ll + $(CC) $(CC_FLAGS) -emit-llvm src/$(APP)_loop.cpp -S -o $(BUILD_DIR)/$(APP)_loop.ll $(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP)_promise.ll -S -o $(BUILD_DIR)/$(APP)_promise.visc.ll + $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP)_loop.ll -S -o $(BUILD_DIR)/$(APP)_loop.visc.ll $(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_cudnn.bc - $(OPT) $(VISC_OPTFLAGS2) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_promise.bc + #$(OPT) $(VISC_OPTFLAGS2) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_promise.bc + $(OPT) $(VISC_OPTFLAGS3) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_wrapperapi.bc + $(OPT) $(VISC_OPTFLAGS3) $(BUILD_DIR)/$(APP)_loop.visc.ll -o $(BUILD_DIR)/$(APP)_loop_wrapperapi.bc $(LLVM_LINK) $(BUILD_DIR)/$(APP)_cudnn.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_cudnn_linked.bc - $(LLVM_LINK) $(BUILD_DIR)/$(APP)_promise.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_promise_linked.bc - $(CC) $(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_cudnn_linked $(LINKER_FLAGS) - $(CC) $(BUILD_DIR)/$(APP)_promise_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_promise_linked $(LINKER_FLAGS) - #$(CC) $(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/lenet_tune $(LINKER_FLAGS) + #$(LLVM_LINK) $(BUILD_DIR)/$(APP)_promise.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_promise_linked.bc + $(LLVM_LINK) $(BUILD_DIR)/$(APP)_wrapperapi.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_wrapperapi_linked.bc + $(LLVM_LINK) $(BUILD_DIR)/$(APP)_loop_wrapperapi.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_loop_wrapperapi_linked.bc + $(CC) $(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_cudnn_linked $(LINKER_FLAGS) + #$(CC) $(BUILD_DIR)/$(APP)_promise_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_promise_linked $(LINKER_FLAGS) + $(CC) $(BUILD_DIR)/$(APP)_wrapperapi_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_wrapperapi_linked $(LINKER_FLAGS) + $(CC) $(BUILD_DIR)/$(APP)_loop_wrapperapi_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_loop_wrapperapi_linked $(LINKER_FLAGS) $(BUILD_DIR): mkdir -p $@ diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/src/mobilenet_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/src/mobilenet_loop.cpp new file mode 100644 index 0000000000000000000000000000000000000000..096dcbce0b264a8deaa85e1225ed684172c29ab5 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/src/mobilenet_loop.cpp @@ -0,0 +1,2429 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <cstring> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_2_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 32); + __visc__return(2, r, (size_t) 0); +} + +void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_5_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_6_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_8_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 64); + __visc__return(2, r, (size_t) 0); +} + +void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_11_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_13_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_14_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 128); + __visc__return(2, r, (size_t) 0); +} + +void var_16_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_17_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_20_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 128); + __visc__return(2, r, (size_t) 0); +} + +void var_22_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_23_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_26_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 256); + __visc__return(2, r, (size_t) 0); +} + +void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_29_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_32_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_33_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 256); + __visc__return(2, r, (size_t) 0); +} + +void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_35_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_36_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_38_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_39_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); + __visc__return(2, r, (size_t) 0); +} + +void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_41_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_42_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_43_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_44_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_45_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); + __visc__return(2, r, (size_t) 0); +} + +void var_46_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_47_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_48_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_49_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_50_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_51_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); + __visc__return(2, r, (size_t) 0); +} + +void var_52_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_53_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_54_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_55_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_56_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_57_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); + __visc__return(2, r, (size_t) 0); +} + +void var_58_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_59_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_60_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_61_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_62_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_63_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512); + __visc__return(2, r, (size_t) 0); +} + +void var_64_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_65_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_66_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_67_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_68_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_69_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 512); + __visc__return(2, r, (size_t) 0); +} + +void var_70_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_71_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_72_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_73_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_74_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_75_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 1024); + __visc__return(2, r, (size_t) 0); +} + +void var_76_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_77_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_78_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 0, 0, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +void var_79_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2, void* t3, size_t bytes_t3, void* t4, size_t bytes_t4, void* t5, size_t bytes_t5) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(5, t1, t2, t3, t4, t5, 0); + + void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001); + __visc__return(2, r, (size_t) 0); +} + +void var_80_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} + +void var_81_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_pool_mean(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); +} + +void var_82_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_mul(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void var_83_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void var_84_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_softmax(t1); + __visc__return(2, r, (size_t) 0); +} + +void root(void* input, size_t input_bytes, + void* conv2d_1_w, size_t conv2d_1_w_bytes, + void* batch_normalization_1_gamma, size_t batch_normalization_1_gamma_bytes, + void* batch_normalization_1_beta, size_t batch_normalization_1_beta_bytes, + void* batch_normalization_1_mean, size_t batch_normalization_1_mean_bytes, + void* batch_normalization_1_variance, size_t batch_normalization_1_variance_bytes, + void* depthwise_conv2d_1_w, size_t depthwise_conv2d_1_w_bytes, + void* batch_normalization_2_gamma, size_t batch_normalization_2_gamma_bytes, + void* batch_normalization_2_beta, size_t batch_normalization_2_beta_bytes, + void* batch_normalization_2_mean, size_t batch_normalization_2_mean_bytes, + void* batch_normalization_2_variance, size_t batch_normalization_2_variance_bytes, + void* conv2d_2_w, size_t conv2d_2_w_bytes, + void* batch_normalization_3_gamma, size_t batch_normalization_3_gamma_bytes, + void* batch_normalization_3_beta, size_t batch_normalization_3_beta_bytes, + void* batch_normalization_3_mean, size_t batch_normalization_3_mean_bytes, + void* batch_normalization_3_variance, size_t batch_normalization_3_variance_bytes, + void* depthwise_conv2d_2_w, size_t depthwise_conv2d_2_w_bytes, + void* batch_normalization_4_gamma, size_t batch_normalization_4_gamma_bytes, + void* batch_normalization_4_beta, size_t batch_normalization_4_beta_bytes, + void* batch_normalization_4_mean, size_t batch_normalization_4_mean_bytes, + void* batch_normalization_4_variance, size_t batch_normalization_4_variance_bytes, + void* conv2d_3_w, size_t conv2d_3_w_bytes, + void* batch_normalization_5_gamma, size_t batch_normalization_5_gamma_bytes, + void* batch_normalization_5_beta, size_t batch_normalization_5_beta_bytes, + void* batch_normalization_5_mean, size_t batch_normalization_5_mean_bytes, + void* batch_normalization_5_variance, size_t batch_normalization_5_variance_bytes, + void* depthwise_conv2d_3_w, size_t depthwise_conv2d_3_w_bytes, + void* batch_normalization_6_gamma, size_t batch_normalization_6_gamma_bytes, + void* batch_normalization_6_beta, size_t batch_normalization_6_beta_bytes, + void* batch_normalization_6_mean, size_t batch_normalization_6_mean_bytes, + void* batch_normalization_6_variance, size_t batch_normalization_6_variance_bytes, + void* conv2d_4_w, size_t conv2d_4_w_bytes, + void* batch_normalization_7_gamma, size_t batch_normalization_7_gamma_bytes, + void* batch_normalization_7_beta, size_t batch_normalization_7_beta_bytes, + void* batch_normalization_7_mean, size_t batch_normalization_7_mean_bytes, + void* batch_normalization_7_variance, size_t batch_normalization_7_variance_bytes, + void* depthwise_conv2d_4_w, size_t depthwise_conv2d_4_w_bytes, + void* batch_normalization_8_gamma, size_t batch_normalization_8_gamma_bytes, + void* batch_normalization_8_beta, size_t batch_normalization_8_beta_bytes, + void* batch_normalization_8_mean, size_t batch_normalization_8_mean_bytes, + void* batch_normalization_8_variance, size_t batch_normalization_8_variance_bytes, + void* conv2d_5_w, size_t conv2d_5_w_bytes, + void* batch_normalization_9_gamma, size_t batch_normalization_9_gamma_bytes, + void* batch_normalization_9_beta, size_t batch_normalization_9_beta_bytes, + void* batch_normalization_9_mean, size_t batch_normalization_9_mean_bytes, + void* batch_normalization_9_variance, size_t batch_normalization_9_variance_bytes, + void* depthwise_conv2d_5_w, size_t depthwise_conv2d_5_w_bytes, + void* batch_normalization_10_gamma, size_t batch_normalization_10_gamma_bytes, + void* batch_normalization_10_beta, size_t batch_normalization_10_beta_bytes, + void* batch_normalization_10_mean, size_t batch_normalization_10_mean_bytes, + void* batch_normalization_10_variance, size_t batch_normalization_10_variance_bytes, + void* conv2d_6_w, size_t conv2d_6_w_bytes, + void* batch_normalization_11_gamma, size_t batch_normalization_11_gamma_bytes, + void* batch_normalization_11_beta, size_t batch_normalization_11_beta_bytes, + void* batch_normalization_11_mean, size_t batch_normalization_11_mean_bytes, + void* batch_normalization_11_variance, size_t batch_normalization_11_variance_bytes, + void* depthwise_conv2d_6_w, size_t depthwise_conv2d_6_w_bytes, + void* batch_normalization_12_gamma, size_t batch_normalization_12_gamma_bytes, + void* batch_normalization_12_beta, size_t batch_normalization_12_beta_bytes, + void* batch_normalization_12_mean, size_t batch_normalization_12_mean_bytes, + void* batch_normalization_12_variance, size_t batch_normalization_12_variance_bytes, + void* conv2d_7_w, size_t conv2d_7_w_bytes, + void* batch_normalization_13_gamma, size_t batch_normalization_13_gamma_bytes, + void* batch_normalization_13_beta, size_t batch_normalization_13_beta_bytes, + void* batch_normalization_13_mean, size_t batch_normalization_13_mean_bytes, + void* batch_normalization_13_variance, size_t batch_normalization_13_variance_bytes, + void* depthwise_conv2d_7_w, size_t depthwise_conv2d_7_w_bytes, + void* batch_normalization_14_gamma, size_t batch_normalization_14_gamma_bytes, + void* batch_normalization_14_beta, size_t batch_normalization_14_beta_bytes, + void* batch_normalization_14_mean, size_t batch_normalization_14_mean_bytes, + void* batch_normalization_14_variance, size_t batch_normalization_14_variance_bytes, + void* conv2d_8_w, size_t conv2d_8_w_bytes, + void* batch_normalization_15_gamma, size_t batch_normalization_15_gamma_bytes, + void* batch_normalization_15_beta, size_t batch_normalization_15_beta_bytes, + void* batch_normalization_15_mean, size_t batch_normalization_15_mean_bytes, + void* batch_normalization_15_variance, size_t batch_normalization_15_variance_bytes, + void* depthwise_conv2d_8_w, size_t depthwise_conv2d_8_w_bytes, + void* batch_normalization_16_gamma, size_t batch_normalization_16_gamma_bytes, + void* batch_normalization_16_beta, size_t batch_normalization_16_beta_bytes, + void* batch_normalization_16_mean, size_t batch_normalization_16_mean_bytes, + void* batch_normalization_16_variance, size_t batch_normalization_16_variance_bytes, + void* conv2d_9_w, size_t conv2d_9_w_bytes, + void* batch_normalization_17_gamma, size_t batch_normalization_17_gamma_bytes, + void* batch_normalization_17_beta, size_t batch_normalization_17_beta_bytes, + void* batch_normalization_17_mean, size_t batch_normalization_17_mean_bytes, + void* batch_normalization_17_variance, size_t batch_normalization_17_variance_bytes, + void* depthwise_conv2d_9_w, size_t depthwise_conv2d_9_w_bytes, + void* batch_normalization_18_gamma, size_t batch_normalization_18_gamma_bytes, + void* batch_normalization_18_beta, size_t batch_normalization_18_beta_bytes, + void* batch_normalization_18_mean, size_t batch_normalization_18_mean_bytes, + void* batch_normalization_18_variance, size_t batch_normalization_18_variance_bytes, + void* conv2d_10_w, size_t conv2d_10_w_bytes, + void* batch_normalization_19_gamma, size_t batch_normalization_19_gamma_bytes, + void* batch_normalization_19_beta, size_t batch_normalization_19_beta_bytes, + void* batch_normalization_19_mean, size_t batch_normalization_19_mean_bytes, + void* batch_normalization_19_variance, size_t batch_normalization_19_variance_bytes, + void* depthwise_conv2d_10_w, size_t depthwise_conv2d_10_w_bytes, + void* batch_normalization_20_gamma, size_t batch_normalization_20_gamma_bytes, + void* batch_normalization_20_beta, size_t batch_normalization_20_beta_bytes, + void* batch_normalization_20_mean, size_t batch_normalization_20_mean_bytes, + void* batch_normalization_20_variance, size_t batch_normalization_20_variance_bytes, + void* conv2d_11_w, size_t conv2d_11_w_bytes, + void* batch_normalization_21_gamma, size_t batch_normalization_21_gamma_bytes, + void* batch_normalization_21_beta, size_t batch_normalization_21_beta_bytes, + void* batch_normalization_21_mean, size_t batch_normalization_21_mean_bytes, + void* batch_normalization_21_variance, size_t batch_normalization_21_variance_bytes, + void* depthwise_conv2d_11_w, size_t depthwise_conv2d_11_w_bytes, + void* batch_normalization_22_gamma, size_t batch_normalization_22_gamma_bytes, + void* batch_normalization_22_beta, size_t batch_normalization_22_beta_bytes, + void* batch_normalization_22_mean, size_t batch_normalization_22_mean_bytes, + void* batch_normalization_22_variance, size_t batch_normalization_22_variance_bytes, + void* conv2d_12_w, size_t conv2d_12_w_bytes, + void* batch_normalization_23_gamma, size_t batch_normalization_23_gamma_bytes, + void* batch_normalization_23_beta, size_t batch_normalization_23_beta_bytes, + void* batch_normalization_23_mean, size_t batch_normalization_23_mean_bytes, + void* batch_normalization_23_variance, size_t batch_normalization_23_variance_bytes, + void* depthwise_conv2d_12_w, size_t depthwise_conv2d_12_w_bytes, + void* batch_normalization_24_gamma, size_t batch_normalization_24_gamma_bytes, + void* batch_normalization_24_beta, size_t batch_normalization_24_beta_bytes, + void* batch_normalization_24_mean, size_t batch_normalization_24_mean_bytes, + void* batch_normalization_24_variance, size_t batch_normalization_24_variance_bytes, + void* conv2d_13_w, size_t conv2d_13_w_bytes, + void* batch_normalization_25_gamma, size_t batch_normalization_25_gamma_bytes, + void* batch_normalization_25_beta, size_t batch_normalization_25_beta_bytes, + void* batch_normalization_25_mean, size_t batch_normalization_25_mean_bytes, + void* batch_normalization_25_variance, size_t batch_normalization_25_variance_bytes, + void* depthwise_conv2d_13_w, size_t depthwise_conv2d_13_w_bytes, + void* batch_normalization_26_gamma, size_t batch_normalization_26_gamma_bytes, + void* batch_normalization_26_beta, size_t batch_normalization_26_beta_bytes, + void* batch_normalization_26_mean, size_t batch_normalization_26_mean_bytes, + void* batch_normalization_26_variance, size_t batch_normalization_26_variance_bytes, + void* conv2d_14_w, size_t conv2d_14_w_bytes, + void* batch_normalization_27_gamma, size_t batch_normalization_27_gamma_bytes, + void* batch_normalization_27_beta, size_t batch_normalization_27_beta_bytes, + void* batch_normalization_27_mean, size_t batch_normalization_27_mean_bytes, + void* batch_normalization_27_variance, size_t batch_normalization_27_variance_bytes, + void* dense_1_w, size_t dense_1_w_bytes, + void* dense_1_b, size_t dense_1_b_bytes){ + + + __visc__hint(visc::CPU_TARGET); + __visc__attributes(138, input, conv2d_1_w, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, depthwise_conv2d_1_w, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, conv2d_2_w, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, depthwise_conv2d_2_w, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, conv2d_3_w, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, depthwise_conv2d_3_w, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, conv2d_4_w, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, depthwise_conv2d_4_w, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, conv2d_5_w, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, depthwise_conv2d_5_w, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, conv2d_6_w, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, depthwise_conv2d_6_w, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, conv2d_7_w, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, depthwise_conv2d_7_w, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, conv2d_8_w, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, depthwise_conv2d_8_w, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, conv2d_9_w, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, depthwise_conv2d_9_w, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, conv2d_10_w, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, depthwise_conv2d_10_w, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, conv2d_11_w, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, depthwise_conv2d_11_w, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, conv2d_12_w, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, depthwise_conv2d_12_w, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, conv2d_13_w, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, depthwise_conv2d_13_w, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, conv2d_14_w, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, dense_1_w, dense_1_b, 0); + + + void* var_0 = __visc__createNodeND(0, var_0_node); + + __visc__bindIn(var_0, 0, 0, 0); + __visc__bindIn(var_0, 1, 1, 0); + __visc__bindIn(var_0, 2, 2, 0); + __visc__bindIn(var_0, 3, 3, 0); + + void* var_1 = __visc__createNodeND(0, var_1_node); + + __visc__edge(var_0, var_1, 1, 0, 0, 0); + __visc__edge(var_0, var_1, 1, 1, 1, 0); + __visc__bindIn(var_1, 4, 2, 0); + __visc__bindIn(var_1, 5, 3, 0); + __visc__bindIn(var_1, 6, 4, 0); + __visc__bindIn(var_1, 7, 5, 0); + __visc__bindIn(var_1, 8, 6, 0); + __visc__bindIn(var_1, 9, 7, 0); + __visc__bindIn(var_1, 10, 8, 0); + __visc__bindIn(var_1, 11, 9, 0); + + void* var_2 = __visc__createNodeND(0, var_2_node); + + __visc__edge(var_1, var_2, 1, 0, 0, 0); + __visc__edge(var_1, var_2, 1, 1, 1, 0); + + void* var_3 = __visc__createNodeND(0, var_3_node); + + __visc__edge(var_2, var_3, 1, 0, 0, 0); + __visc__edge(var_2, var_3, 1, 1, 1, 0); + __visc__bindIn(var_3, 12, 2, 0); + __visc__bindIn(var_3, 13, 3, 0); + + void* var_4 = __visc__createNodeND(0, var_4_node); + + __visc__edge(var_3, var_4, 1, 0, 0, 0); + __visc__edge(var_3, var_4, 1, 1, 1, 0); + __visc__bindIn(var_4, 14, 2, 0); + __visc__bindIn(var_4, 15, 3, 0); + __visc__bindIn(var_4, 16, 4, 0); + __visc__bindIn(var_4, 17, 5, 0); + __visc__bindIn(var_4, 18, 6, 0); + __visc__bindIn(var_4, 19, 7, 0); + __visc__bindIn(var_4, 20, 8, 0); + __visc__bindIn(var_4, 21, 9, 0); + + void* var_5 = __visc__createNodeND(0, var_5_node); + + __visc__edge(var_4, var_5, 1, 0, 0, 0); + __visc__edge(var_4, var_5, 1, 1, 1, 0); + + void* var_6 = __visc__createNodeND(0, var_6_node); + + __visc__edge(var_5, var_6, 1, 0, 0, 0); + __visc__edge(var_5, var_6, 1, 1, 1, 0); + __visc__bindIn(var_6, 22, 2, 0); + __visc__bindIn(var_6, 23, 3, 0); + + void* var_7 = __visc__createNodeND(0, var_7_node); + + __visc__edge(var_6, var_7, 1, 0, 0, 0); + __visc__edge(var_6, var_7, 1, 1, 1, 0); + __visc__bindIn(var_7, 24, 2, 0); + __visc__bindIn(var_7, 25, 3, 0); + __visc__bindIn(var_7, 26, 4, 0); + __visc__bindIn(var_7, 27, 5, 0); + __visc__bindIn(var_7, 28, 6, 0); + __visc__bindIn(var_7, 29, 7, 0); + __visc__bindIn(var_7, 30, 8, 0); + __visc__bindIn(var_7, 31, 9, 0); + + void* var_8 = __visc__createNodeND(0, var_8_node); + + __visc__edge(var_7, var_8, 1, 0, 0, 0); + __visc__edge(var_7, var_8, 1, 1, 1, 0); + + void* var_9 = __visc__createNodeND(0, var_9_node); + + __visc__edge(var_8, var_9, 1, 0, 0, 0); + __visc__edge(var_8, var_9, 1, 1, 1, 0); + __visc__bindIn(var_9, 32, 2, 0); + __visc__bindIn(var_9, 33, 3, 0); + + void* var_10 = __visc__createNodeND(0, var_10_node); + + __visc__edge(var_9, var_10, 1, 0, 0, 0); + __visc__edge(var_9, var_10, 1, 1, 1, 0); + __visc__bindIn(var_10, 34, 2, 0); + __visc__bindIn(var_10, 35, 3, 0); + __visc__bindIn(var_10, 36, 4, 0); + __visc__bindIn(var_10, 37, 5, 0); + __visc__bindIn(var_10, 38, 6, 0); + __visc__bindIn(var_10, 39, 7, 0); + __visc__bindIn(var_10, 40, 8, 0); + __visc__bindIn(var_10, 41, 9, 0); + + void* var_11 = __visc__createNodeND(0, var_11_node); + + __visc__edge(var_10, var_11, 1, 0, 0, 0); + __visc__edge(var_10, var_11, 1, 1, 1, 0); + + void* var_12 = __visc__createNodeND(0, var_12_node); + + __visc__edge(var_11, var_12, 1, 0, 0, 0); + __visc__edge(var_11, var_12, 1, 1, 1, 0); + __visc__bindIn(var_12, 42, 2, 0); + __visc__bindIn(var_12, 43, 3, 0); + + void* var_13 = __visc__createNodeND(0, var_13_node); + + __visc__edge(var_12, var_13, 1, 0, 0, 0); + __visc__edge(var_12, var_13, 1, 1, 1, 0); + __visc__bindIn(var_13, 44, 2, 0); + __visc__bindIn(var_13, 45, 3, 0); + __visc__bindIn(var_13, 46, 4, 0); + __visc__bindIn(var_13, 47, 5, 0); + __visc__bindIn(var_13, 48, 6, 0); + __visc__bindIn(var_13, 49, 7, 0); + __visc__bindIn(var_13, 50, 8, 0); + __visc__bindIn(var_13, 51, 9, 0); + + void* var_14 = __visc__createNodeND(0, var_14_node); + + __visc__edge(var_13, var_14, 1, 0, 0, 0); + __visc__edge(var_13, var_14, 1, 1, 1, 0); + + void* var_15 = __visc__createNodeND(0, var_15_node); + + __visc__edge(var_14, var_15, 1, 0, 0, 0); + __visc__edge(var_14, var_15, 1, 1, 1, 0); + __visc__bindIn(var_15, 52, 2, 0); + __visc__bindIn(var_15, 53, 3, 0); + + void* var_16 = __visc__createNodeND(0, var_16_node); + + __visc__edge(var_15, var_16, 1, 0, 0, 0); + __visc__edge(var_15, var_16, 1, 1, 1, 0); + __visc__bindIn(var_16, 54, 2, 0); + __visc__bindIn(var_16, 55, 3, 0); + __visc__bindIn(var_16, 56, 4, 0); + __visc__bindIn(var_16, 57, 5, 0); + __visc__bindIn(var_16, 58, 6, 0); + __visc__bindIn(var_16, 59, 7, 0); + __visc__bindIn(var_16, 60, 8, 0); + __visc__bindIn(var_16, 61, 9, 0); + + void* var_17 = __visc__createNodeND(0, var_17_node); + + __visc__edge(var_16, var_17, 1, 0, 0, 0); + __visc__edge(var_16, var_17, 1, 1, 1, 0); + + void* var_18 = __visc__createNodeND(0, var_18_node); + + __visc__edge(var_17, var_18, 1, 0, 0, 0); + __visc__edge(var_17, var_18, 1, 1, 1, 0); + __visc__bindIn(var_18, 62, 2, 0); + __visc__bindIn(var_18, 63, 3, 0); + + void* var_19 = __visc__createNodeND(0, var_19_node); + + __visc__edge(var_18, var_19, 1, 0, 0, 0); + __visc__edge(var_18, var_19, 1, 1, 1, 0); + __visc__bindIn(var_19, 64, 2, 0); + __visc__bindIn(var_19, 65, 3, 0); + __visc__bindIn(var_19, 66, 4, 0); + __visc__bindIn(var_19, 67, 5, 0); + __visc__bindIn(var_19, 68, 6, 0); + __visc__bindIn(var_19, 69, 7, 0); + __visc__bindIn(var_19, 70, 8, 0); + __visc__bindIn(var_19, 71, 9, 0); + + void* var_20 = __visc__createNodeND(0, var_20_node); + + __visc__edge(var_19, var_20, 1, 0, 0, 0); + __visc__edge(var_19, var_20, 1, 1, 1, 0); + + void* var_21 = __visc__createNodeND(0, var_21_node); + + __visc__edge(var_20, var_21, 1, 0, 0, 0); + __visc__edge(var_20, var_21, 1, 1, 1, 0); + __visc__bindIn(var_21, 72, 2, 0); + __visc__bindIn(var_21, 73, 3, 0); + + void* var_22 = __visc__createNodeND(0, var_22_node); + + __visc__edge(var_21, var_22, 1, 0, 0, 0); + __visc__edge(var_21, var_22, 1, 1, 1, 0); + __visc__bindIn(var_22, 74, 2, 0); + __visc__bindIn(var_22, 75, 3, 0); + __visc__bindIn(var_22, 76, 4, 0); + __visc__bindIn(var_22, 77, 5, 0); + __visc__bindIn(var_22, 78, 6, 0); + __visc__bindIn(var_22, 79, 7, 0); + __visc__bindIn(var_22, 80, 8, 0); + __visc__bindIn(var_22, 81, 9, 0); + + void* var_23 = __visc__createNodeND(0, var_23_node); + + __visc__edge(var_22, var_23, 1, 0, 0, 0); + __visc__edge(var_22, var_23, 1, 1, 1, 0); + + void* var_24 = __visc__createNodeND(0, var_24_node); + + __visc__edge(var_23, var_24, 1, 0, 0, 0); + __visc__edge(var_23, var_24, 1, 1, 1, 0); + __visc__bindIn(var_24, 82, 2, 0); + __visc__bindIn(var_24, 83, 3, 0); + + void* var_25 = __visc__createNodeND(0, var_25_node); + + __visc__edge(var_24, var_25, 1, 0, 0, 0); + __visc__edge(var_24, var_25, 1, 1, 1, 0); + __visc__bindIn(var_25, 84, 2, 0); + __visc__bindIn(var_25, 85, 3, 0); + __visc__bindIn(var_25, 86, 4, 0); + __visc__bindIn(var_25, 87, 5, 0); + __visc__bindIn(var_25, 88, 6, 0); + __visc__bindIn(var_25, 89, 7, 0); + __visc__bindIn(var_25, 90, 8, 0); + __visc__bindIn(var_25, 91, 9, 0); + + void* var_26 = __visc__createNodeND(0, var_26_node); + + __visc__edge(var_25, var_26, 1, 0, 0, 0); + __visc__edge(var_25, var_26, 1, 1, 1, 0); + + void* var_27 = __visc__createNodeND(0, var_27_node); + + __visc__edge(var_26, var_27, 1, 0, 0, 0); + __visc__edge(var_26, var_27, 1, 1, 1, 0); + __visc__bindIn(var_27, 92, 2, 0); + __visc__bindIn(var_27, 93, 3, 0); + + void* var_28 = __visc__createNodeND(0, var_28_node); + + __visc__edge(var_27, var_28, 1, 0, 0, 0); + __visc__edge(var_27, var_28, 1, 1, 1, 0); + __visc__bindIn(var_28, 94, 2, 0); + __visc__bindIn(var_28, 95, 3, 0); + __visc__bindIn(var_28, 96, 4, 0); + __visc__bindIn(var_28, 97, 5, 0); + __visc__bindIn(var_28, 98, 6, 0); + __visc__bindIn(var_28, 99, 7, 0); + __visc__bindIn(var_28, 100, 8, 0); + __visc__bindIn(var_28, 101, 9, 0); + + void* var_29 = __visc__createNodeND(0, var_29_node); + + __visc__edge(var_28, var_29, 1, 0, 0, 0); + __visc__edge(var_28, var_29, 1, 1, 1, 0); + + void* var_30 = __visc__createNodeND(0, var_30_node); + + __visc__edge(var_29, var_30, 1, 0, 0, 0); + __visc__edge(var_29, var_30, 1, 1, 1, 0); + __visc__bindIn(var_30, 102, 2, 0); + __visc__bindIn(var_30, 103, 3, 0); + + void* var_31 = __visc__createNodeND(0, var_31_node); + + __visc__edge(var_30, var_31, 1, 0, 0, 0); + __visc__edge(var_30, var_31, 1, 1, 1, 0); + __visc__bindIn(var_31, 104, 2, 0); + __visc__bindIn(var_31, 105, 3, 0); + __visc__bindIn(var_31, 106, 4, 0); + __visc__bindIn(var_31, 107, 5, 0); + __visc__bindIn(var_31, 108, 6, 0); + __visc__bindIn(var_31, 109, 7, 0); + __visc__bindIn(var_31, 110, 8, 0); + __visc__bindIn(var_31, 111, 9, 0); + + void* var_32 = __visc__createNodeND(0, var_32_node); + + __visc__edge(var_31, var_32, 1, 0, 0, 0); + __visc__edge(var_31, var_32, 1, 1, 1, 0); + + void* var_33 = __visc__createNodeND(0, var_33_node); + + __visc__edge(var_32, var_33, 1, 0, 0, 0); + __visc__edge(var_32, var_33, 1, 1, 1, 0); + __visc__bindIn(var_33, 112, 2, 0); + __visc__bindIn(var_33, 113, 3, 0); + + void* var_34 = __visc__createNodeND(0, var_34_node); + + __visc__edge(var_33, var_34, 1, 0, 0, 0); + __visc__edge(var_33, var_34, 1, 1, 1, 0); + __visc__bindIn(var_34, 114, 2, 0); + __visc__bindIn(var_34, 115, 3, 0); + __visc__bindIn(var_34, 116, 4, 0); + __visc__bindIn(var_34, 117, 5, 0); + __visc__bindIn(var_34, 118, 6, 0); + __visc__bindIn(var_34, 119, 7, 0); + __visc__bindIn(var_34, 120, 8, 0); + __visc__bindIn(var_34, 121, 9, 0); + + void* var_35 = __visc__createNodeND(0, var_35_node); + + __visc__edge(var_34, var_35, 1, 0, 0, 0); + __visc__edge(var_34, var_35, 1, 1, 1, 0); + + void* var_36 = __visc__createNodeND(0, var_36_node); + + __visc__edge(var_35, var_36, 1, 0, 0, 0); + __visc__edge(var_35, var_36, 1, 1, 1, 0); + __visc__bindIn(var_36, 122, 2, 0); + __visc__bindIn(var_36, 123, 3, 0); + + void* var_37 = __visc__createNodeND(0, var_37_node); + + __visc__edge(var_36, var_37, 1, 0, 0, 0); + __visc__edge(var_36, var_37, 1, 1, 1, 0); + __visc__bindIn(var_37, 124, 2, 0); + __visc__bindIn(var_37, 125, 3, 0); + __visc__bindIn(var_37, 126, 4, 0); + __visc__bindIn(var_37, 127, 5, 0); + __visc__bindIn(var_37, 128, 6, 0); + __visc__bindIn(var_37, 129, 7, 0); + __visc__bindIn(var_37, 130, 8, 0); + __visc__bindIn(var_37, 131, 9, 0); + + void* var_38 = __visc__createNodeND(0, var_38_node); + + __visc__edge(var_37, var_38, 1, 0, 0, 0); + __visc__edge(var_37, var_38, 1, 1, 1, 0); + + void* var_39 = __visc__createNodeND(0, var_39_node); + + __visc__edge(var_38, var_39, 1, 0, 0, 0); + __visc__edge(var_38, var_39, 1, 1, 1, 0); + __visc__bindIn(var_39, 132, 2, 0); + __visc__bindIn(var_39, 133, 3, 0); + + void* var_40 = __visc__createNodeND(0, var_40_node); + + __visc__edge(var_39, var_40, 1, 0, 0, 0); + __visc__edge(var_39, var_40, 1, 1, 1, 0); + __visc__bindIn(var_40, 134, 2, 0); + __visc__bindIn(var_40, 135, 3, 0); + __visc__bindIn(var_40, 136, 4, 0); + __visc__bindIn(var_40, 137, 5, 0); + __visc__bindIn(var_40, 138, 6, 0); + __visc__bindIn(var_40, 139, 7, 0); + __visc__bindIn(var_40, 140, 8, 0); + __visc__bindIn(var_40, 141, 9, 0); + + void* var_41 = __visc__createNodeND(0, var_41_node); + + __visc__edge(var_40, var_41, 1, 0, 0, 0); + __visc__edge(var_40, var_41, 1, 1, 1, 0); + + void* var_42 = __visc__createNodeND(0, var_42_node); + + __visc__edge(var_41, var_42, 1, 0, 0, 0); + __visc__edge(var_41, var_42, 1, 1, 1, 0); + __visc__bindIn(var_42, 142, 2, 0); + __visc__bindIn(var_42, 143, 3, 0); + + void* var_43 = __visc__createNodeND(0, var_43_node); + + __visc__edge(var_42, var_43, 1, 0, 0, 0); + __visc__edge(var_42, var_43, 1, 1, 1, 0); + __visc__bindIn(var_43, 144, 2, 0); + __visc__bindIn(var_43, 145, 3, 0); + __visc__bindIn(var_43, 146, 4, 0); + __visc__bindIn(var_43, 147, 5, 0); + __visc__bindIn(var_43, 148, 6, 0); + __visc__bindIn(var_43, 149, 7, 0); + __visc__bindIn(var_43, 150, 8, 0); + __visc__bindIn(var_43, 151, 9, 0); + + void* var_44 = __visc__createNodeND(0, var_44_node); + + __visc__edge(var_43, var_44, 1, 0, 0, 0); + __visc__edge(var_43, var_44, 1, 1, 1, 0); + + void* var_45 = __visc__createNodeND(0, var_45_node); + + __visc__edge(var_44, var_45, 1, 0, 0, 0); + __visc__edge(var_44, var_45, 1, 1, 1, 0); + __visc__bindIn(var_45, 152, 2, 0); + __visc__bindIn(var_45, 153, 3, 0); + + void* var_46 = __visc__createNodeND(0, var_46_node); + + __visc__edge(var_45, var_46, 1, 0, 0, 0); + __visc__edge(var_45, var_46, 1, 1, 1, 0); + __visc__bindIn(var_46, 154, 2, 0); + __visc__bindIn(var_46, 155, 3, 0); + __visc__bindIn(var_46, 156, 4, 0); + __visc__bindIn(var_46, 157, 5, 0); + __visc__bindIn(var_46, 158, 6, 0); + __visc__bindIn(var_46, 159, 7, 0); + __visc__bindIn(var_46, 160, 8, 0); + __visc__bindIn(var_46, 161, 9, 0); + + void* var_47 = __visc__createNodeND(0, var_47_node); + + __visc__edge(var_46, var_47, 1, 0, 0, 0); + __visc__edge(var_46, var_47, 1, 1, 1, 0); + + void* var_48 = __visc__createNodeND(0, var_48_node); + + __visc__edge(var_47, var_48, 1, 0, 0, 0); + __visc__edge(var_47, var_48, 1, 1, 1, 0); + __visc__bindIn(var_48, 162, 2, 0); + __visc__bindIn(var_48, 163, 3, 0); + + void* var_49 = __visc__createNodeND(0, var_49_node); + + __visc__edge(var_48, var_49, 1, 0, 0, 0); + __visc__edge(var_48, var_49, 1, 1, 1, 0); + __visc__bindIn(var_49, 164, 2, 0); + __visc__bindIn(var_49, 165, 3, 0); + __visc__bindIn(var_49, 166, 4, 0); + __visc__bindIn(var_49, 167, 5, 0); + __visc__bindIn(var_49, 168, 6, 0); + __visc__bindIn(var_49, 169, 7, 0); + __visc__bindIn(var_49, 170, 8, 0); + __visc__bindIn(var_49, 171, 9, 0); + + void* var_50 = __visc__createNodeND(0, var_50_node); + + __visc__edge(var_49, var_50, 1, 0, 0, 0); + __visc__edge(var_49, var_50, 1, 1, 1, 0); + + void* var_51 = __visc__createNodeND(0, var_51_node); + + __visc__edge(var_50, var_51, 1, 0, 0, 0); + __visc__edge(var_50, var_51, 1, 1, 1, 0); + __visc__bindIn(var_51, 172, 2, 0); + __visc__bindIn(var_51, 173, 3, 0); + + void* var_52 = __visc__createNodeND(0, var_52_node); + + __visc__edge(var_51, var_52, 1, 0, 0, 0); + __visc__edge(var_51, var_52, 1, 1, 1, 0); + __visc__bindIn(var_52, 174, 2, 0); + __visc__bindIn(var_52, 175, 3, 0); + __visc__bindIn(var_52, 176, 4, 0); + __visc__bindIn(var_52, 177, 5, 0); + __visc__bindIn(var_52, 178, 6, 0); + __visc__bindIn(var_52, 179, 7, 0); + __visc__bindIn(var_52, 180, 8, 0); + __visc__bindIn(var_52, 181, 9, 0); + + void* var_53 = __visc__createNodeND(0, var_53_node); + + __visc__edge(var_52, var_53, 1, 0, 0, 0); + __visc__edge(var_52, var_53, 1, 1, 1, 0); + + void* var_54 = __visc__createNodeND(0, var_54_node); + + __visc__edge(var_53, var_54, 1, 0, 0, 0); + __visc__edge(var_53, var_54, 1, 1, 1, 0); + __visc__bindIn(var_54, 182, 2, 0); + __visc__bindIn(var_54, 183, 3, 0); + + void* var_55 = __visc__createNodeND(0, var_55_node); + + __visc__edge(var_54, var_55, 1, 0, 0, 0); + __visc__edge(var_54, var_55, 1, 1, 1, 0); + __visc__bindIn(var_55, 184, 2, 0); + __visc__bindIn(var_55, 185, 3, 0); + __visc__bindIn(var_55, 186, 4, 0); + __visc__bindIn(var_55, 187, 5, 0); + __visc__bindIn(var_55, 188, 6, 0); + __visc__bindIn(var_55, 189, 7, 0); + __visc__bindIn(var_55, 190, 8, 0); + __visc__bindIn(var_55, 191, 9, 0); + + void* var_56 = __visc__createNodeND(0, var_56_node); + + __visc__edge(var_55, var_56, 1, 0, 0, 0); + __visc__edge(var_55, var_56, 1, 1, 1, 0); + + void* var_57 = __visc__createNodeND(0, var_57_node); + + __visc__edge(var_56, var_57, 1, 0, 0, 0); + __visc__edge(var_56, var_57, 1, 1, 1, 0); + __visc__bindIn(var_57, 192, 2, 0); + __visc__bindIn(var_57, 193, 3, 0); + + void* var_58 = __visc__createNodeND(0, var_58_node); + + __visc__edge(var_57, var_58, 1, 0, 0, 0); + __visc__edge(var_57, var_58, 1, 1, 1, 0); + __visc__bindIn(var_58, 194, 2, 0); + __visc__bindIn(var_58, 195, 3, 0); + __visc__bindIn(var_58, 196, 4, 0); + __visc__bindIn(var_58, 197, 5, 0); + __visc__bindIn(var_58, 198, 6, 0); + __visc__bindIn(var_58, 199, 7, 0); + __visc__bindIn(var_58, 200, 8, 0); + __visc__bindIn(var_58, 201, 9, 0); + + void* var_59 = __visc__createNodeND(0, var_59_node); + + __visc__edge(var_58, var_59, 1, 0, 0, 0); + __visc__edge(var_58, var_59, 1, 1, 1, 0); + + void* var_60 = __visc__createNodeND(0, var_60_node); + + __visc__edge(var_59, var_60, 1, 0, 0, 0); + __visc__edge(var_59, var_60, 1, 1, 1, 0); + __visc__bindIn(var_60, 202, 2, 0); + __visc__bindIn(var_60, 203, 3, 0); + + void* var_61 = __visc__createNodeND(0, var_61_node); + + __visc__edge(var_60, var_61, 1, 0, 0, 0); + __visc__edge(var_60, var_61, 1, 1, 1, 0); + __visc__bindIn(var_61, 204, 2, 0); + __visc__bindIn(var_61, 205, 3, 0); + __visc__bindIn(var_61, 206, 4, 0); + __visc__bindIn(var_61, 207, 5, 0); + __visc__bindIn(var_61, 208, 6, 0); + __visc__bindIn(var_61, 209, 7, 0); + __visc__bindIn(var_61, 210, 8, 0); + __visc__bindIn(var_61, 211, 9, 0); + + void* var_62 = __visc__createNodeND(0, var_62_node); + + __visc__edge(var_61, var_62, 1, 0, 0, 0); + __visc__edge(var_61, var_62, 1, 1, 1, 0); + + void* var_63 = __visc__createNodeND(0, var_63_node); + + __visc__edge(var_62, var_63, 1, 0, 0, 0); + __visc__edge(var_62, var_63, 1, 1, 1, 0); + __visc__bindIn(var_63, 212, 2, 0); + __visc__bindIn(var_63, 213, 3, 0); + + void* var_64 = __visc__createNodeND(0, var_64_node); + + __visc__edge(var_63, var_64, 1, 0, 0, 0); + __visc__edge(var_63, var_64, 1, 1, 1, 0); + __visc__bindIn(var_64, 214, 2, 0); + __visc__bindIn(var_64, 215, 3, 0); + __visc__bindIn(var_64, 216, 4, 0); + __visc__bindIn(var_64, 217, 5, 0); + __visc__bindIn(var_64, 218, 6, 0); + __visc__bindIn(var_64, 219, 7, 0); + __visc__bindIn(var_64, 220, 8, 0); + __visc__bindIn(var_64, 221, 9, 0); + + void* var_65 = __visc__createNodeND(0, var_65_node); + + __visc__edge(var_64, var_65, 1, 0, 0, 0); + __visc__edge(var_64, var_65, 1, 1, 1, 0); + + void* var_66 = __visc__createNodeND(0, var_66_node); + + __visc__edge(var_65, var_66, 1, 0, 0, 0); + __visc__edge(var_65, var_66, 1, 1, 1, 0); + __visc__bindIn(var_66, 222, 2, 0); + __visc__bindIn(var_66, 223, 3, 0); + + void* var_67 = __visc__createNodeND(0, var_67_node); + + __visc__edge(var_66, var_67, 1, 0, 0, 0); + __visc__edge(var_66, var_67, 1, 1, 1, 0); + __visc__bindIn(var_67, 224, 2, 0); + __visc__bindIn(var_67, 225, 3, 0); + __visc__bindIn(var_67, 226, 4, 0); + __visc__bindIn(var_67, 227, 5, 0); + __visc__bindIn(var_67, 228, 6, 0); + __visc__bindIn(var_67, 229, 7, 0); + __visc__bindIn(var_67, 230, 8, 0); + __visc__bindIn(var_67, 231, 9, 0); + + void* var_68 = __visc__createNodeND(0, var_68_node); + + __visc__edge(var_67, var_68, 1, 0, 0, 0); + __visc__edge(var_67, var_68, 1, 1, 1, 0); + + void* var_69 = __visc__createNodeND(0, var_69_node); + + __visc__edge(var_68, var_69, 1, 0, 0, 0); + __visc__edge(var_68, var_69, 1, 1, 1, 0); + __visc__bindIn(var_69, 232, 2, 0); + __visc__bindIn(var_69, 233, 3, 0); + + void* var_70 = __visc__createNodeND(0, var_70_node); + + __visc__edge(var_69, var_70, 1, 0, 0, 0); + __visc__edge(var_69, var_70, 1, 1, 1, 0); + __visc__bindIn(var_70, 234, 2, 0); + __visc__bindIn(var_70, 235, 3, 0); + __visc__bindIn(var_70, 236, 4, 0); + __visc__bindIn(var_70, 237, 5, 0); + __visc__bindIn(var_70, 238, 6, 0); + __visc__bindIn(var_70, 239, 7, 0); + __visc__bindIn(var_70, 240, 8, 0); + __visc__bindIn(var_70, 241, 9, 0); + + void* var_71 = __visc__createNodeND(0, var_71_node); + + __visc__edge(var_70, var_71, 1, 0, 0, 0); + __visc__edge(var_70, var_71, 1, 1, 1, 0); + + void* var_72 = __visc__createNodeND(0, var_72_node); + + __visc__edge(var_71, var_72, 1, 0, 0, 0); + __visc__edge(var_71, var_72, 1, 1, 1, 0); + __visc__bindIn(var_72, 242, 2, 0); + __visc__bindIn(var_72, 243, 3, 0); + + void* var_73 = __visc__createNodeND(0, var_73_node); + + __visc__edge(var_72, var_73, 1, 0, 0, 0); + __visc__edge(var_72, var_73, 1, 1, 1, 0); + __visc__bindIn(var_73, 244, 2, 0); + __visc__bindIn(var_73, 245, 3, 0); + __visc__bindIn(var_73, 246, 4, 0); + __visc__bindIn(var_73, 247, 5, 0); + __visc__bindIn(var_73, 248, 6, 0); + __visc__bindIn(var_73, 249, 7, 0); + __visc__bindIn(var_73, 250, 8, 0); + __visc__bindIn(var_73, 251, 9, 0); + + void* var_74 = __visc__createNodeND(0, var_74_node); + + __visc__edge(var_73, var_74, 1, 0, 0, 0); + __visc__edge(var_73, var_74, 1, 1, 1, 0); + + void* var_75 = __visc__createNodeND(0, var_75_node); + + __visc__edge(var_74, var_75, 1, 0, 0, 0); + __visc__edge(var_74, var_75, 1, 1, 1, 0); + __visc__bindIn(var_75, 252, 2, 0); + __visc__bindIn(var_75, 253, 3, 0); + + void* var_76 = __visc__createNodeND(0, var_76_node); + + __visc__edge(var_75, var_76, 1, 0, 0, 0); + __visc__edge(var_75, var_76, 1, 1, 1, 0); + __visc__bindIn(var_76, 254, 2, 0); + __visc__bindIn(var_76, 255, 3, 0); + __visc__bindIn(var_76, 256, 4, 0); + __visc__bindIn(var_76, 257, 5, 0); + __visc__bindIn(var_76, 258, 6, 0); + __visc__bindIn(var_76, 259, 7, 0); + __visc__bindIn(var_76, 260, 8, 0); + __visc__bindIn(var_76, 261, 9, 0); + + void* var_77 = __visc__createNodeND(0, var_77_node); + + __visc__edge(var_76, var_77, 1, 0, 0, 0); + __visc__edge(var_76, var_77, 1, 1, 1, 0); + + void* var_78 = __visc__createNodeND(0, var_78_node); + + __visc__edge(var_77, var_78, 1, 0, 0, 0); + __visc__edge(var_77, var_78, 1, 1, 1, 0); + __visc__bindIn(var_78, 262, 2, 0); + __visc__bindIn(var_78, 263, 3, 0); + + void* var_79 = __visc__createNodeND(0, var_79_node); + + __visc__edge(var_78, var_79, 1, 0, 0, 0); + __visc__edge(var_78, var_79, 1, 1, 1, 0); + __visc__bindIn(var_79, 264, 2, 0); + __visc__bindIn(var_79, 265, 3, 0); + __visc__bindIn(var_79, 266, 4, 0); + __visc__bindIn(var_79, 267, 5, 0); + __visc__bindIn(var_79, 268, 6, 0); + __visc__bindIn(var_79, 269, 7, 0); + __visc__bindIn(var_79, 270, 8, 0); + __visc__bindIn(var_79, 271, 9, 0); + + void* var_80 = __visc__createNodeND(0, var_80_node); + + __visc__edge(var_79, var_80, 1, 0, 0, 0); + __visc__edge(var_79, var_80, 1, 1, 1, 0); + + void* var_81 = __visc__createNodeND(0, var_81_node); + + __visc__edge(var_80, var_81, 1, 0, 0, 0); + __visc__edge(var_80, var_81, 1, 1, 1, 0); + + void* var_82 = __visc__createNodeND(0, var_82_node); + + __visc__edge(var_81, var_82, 1, 0, 0, 0); + __visc__edge(var_81, var_82, 1, 1, 1, 0); + __visc__bindIn(var_82, 272, 2, 0); + __visc__bindIn(var_82, 273, 3, 0); + + void* var_83 = __visc__createNodeND(0, var_83_node); + + __visc__edge(var_82, var_83, 1, 0, 0, 0); + __visc__edge(var_82, var_83, 1, 1, 1, 0); + __visc__bindIn(var_83, 274, 2, 0); + __visc__bindIn(var_83, 275, 3, 0); + + void* var_84 = __visc__createNodeND(0, var_84_node); + + __visc__edge(var_83, var_84, 1, 0, 0, 0); + __visc__edge(var_83, var_84, 1, 1, 1, 0); + + __visc__bindOut(var_84, 0, 0, 0); + __visc__bindOut(var_84, 1, 1, 0); + +} + +struct ret_t { + void* tensor; + size_t bytes; +}; + +typedef struct __attribute__((__packed__)) { + void* input; + size_t input_bytes; + void* conv2d_1_w; + size_t conv2d_1_w_bytes; + void* batch_normalization_1_gamma; + size_t batch_normalization_1_gamma_bytes; + void* batch_normalization_1_beta; + size_t batch_normalization_1_beta_bytes; + void* batch_normalization_1_mean; + size_t batch_normalization_1_mean_bytes; + void* batch_normalization_1_variance; + size_t batch_normalization_1_variance_bytes; + void* depthwise_conv2d_1_w; + size_t depthwise_conv2d_1_w_bytes; + void* batch_normalization_2_gamma; + size_t batch_normalization_2_gamma_bytes; + void* batch_normalization_2_beta; + size_t batch_normalization_2_beta_bytes; + void* batch_normalization_2_mean; + size_t batch_normalization_2_mean_bytes; + void* batch_normalization_2_variance; + size_t batch_normalization_2_variance_bytes; + void* conv2d_2_w; + size_t conv2d_2_w_bytes; + void* batch_normalization_3_gamma; + size_t batch_normalization_3_gamma_bytes; + void* batch_normalization_3_beta; + size_t batch_normalization_3_beta_bytes; + void* batch_normalization_3_mean; + size_t batch_normalization_3_mean_bytes; + void* batch_normalization_3_variance; + size_t batch_normalization_3_variance_bytes; + void* depthwise_conv2d_2_w; + size_t depthwise_conv2d_2_w_bytes; + void* batch_normalization_4_gamma; + size_t batch_normalization_4_gamma_bytes; + void* batch_normalization_4_beta; + size_t batch_normalization_4_beta_bytes; + void* batch_normalization_4_mean; + size_t batch_normalization_4_mean_bytes; + void* batch_normalization_4_variance; + size_t batch_normalization_4_variance_bytes; + void* conv2d_3_w; + size_t conv2d_3_w_bytes; + void* batch_normalization_5_gamma; + size_t batch_normalization_5_gamma_bytes; + void* batch_normalization_5_beta; + size_t batch_normalization_5_beta_bytes; + void* batch_normalization_5_mean; + size_t batch_normalization_5_mean_bytes; + void* batch_normalization_5_variance; + size_t batch_normalization_5_variance_bytes; + void* depthwise_conv2d_3_w; + size_t depthwise_conv2d_3_w_bytes; + void* batch_normalization_6_gamma; + size_t batch_normalization_6_gamma_bytes; + void* batch_normalization_6_beta; + size_t batch_normalization_6_beta_bytes; + void* batch_normalization_6_mean; + size_t batch_normalization_6_mean_bytes; + void* batch_normalization_6_variance; + size_t batch_normalization_6_variance_bytes; + void* conv2d_4_w; + size_t conv2d_4_w_bytes; + void* batch_normalization_7_gamma; + size_t batch_normalization_7_gamma_bytes; + void* batch_normalization_7_beta; + size_t batch_normalization_7_beta_bytes; + void* batch_normalization_7_mean; + size_t batch_normalization_7_mean_bytes; + void* batch_normalization_7_variance; + size_t batch_normalization_7_variance_bytes; + void* depthwise_conv2d_4_w; + size_t depthwise_conv2d_4_w_bytes; + void* batch_normalization_8_gamma; + size_t batch_normalization_8_gamma_bytes; + void* batch_normalization_8_beta; + size_t batch_normalization_8_beta_bytes; + void* batch_normalization_8_mean; + size_t batch_normalization_8_mean_bytes; + void* batch_normalization_8_variance; + size_t batch_normalization_8_variance_bytes; + void* conv2d_5_w; + size_t conv2d_5_w_bytes; + void* batch_normalization_9_gamma; + size_t batch_normalization_9_gamma_bytes; + void* batch_normalization_9_beta; + size_t batch_normalization_9_beta_bytes; + void* batch_normalization_9_mean; + size_t batch_normalization_9_mean_bytes; + void* batch_normalization_9_variance; + size_t batch_normalization_9_variance_bytes; + void* depthwise_conv2d_5_w; + size_t depthwise_conv2d_5_w_bytes; + void* batch_normalization_10_gamma; + size_t batch_normalization_10_gamma_bytes; + void* batch_normalization_10_beta; + size_t batch_normalization_10_beta_bytes; + void* batch_normalization_10_mean; + size_t batch_normalization_10_mean_bytes; + void* batch_normalization_10_variance; + size_t batch_normalization_10_variance_bytes; + void* conv2d_6_w; + size_t conv2d_6_w_bytes; + void* batch_normalization_11_gamma; + size_t batch_normalization_11_gamma_bytes; + void* batch_normalization_11_beta; + size_t batch_normalization_11_beta_bytes; + void* batch_normalization_11_mean; + size_t batch_normalization_11_mean_bytes; + void* batch_normalization_11_variance; + size_t batch_normalization_11_variance_bytes; + void* depthwise_conv2d_6_w; + size_t depthwise_conv2d_6_w_bytes; + void* batch_normalization_12_gamma; + size_t batch_normalization_12_gamma_bytes; + void* batch_normalization_12_beta; + size_t batch_normalization_12_beta_bytes; + void* batch_normalization_12_mean; + size_t batch_normalization_12_mean_bytes; + void* batch_normalization_12_variance; + size_t batch_normalization_12_variance_bytes; + void* conv2d_7_w; + size_t conv2d_7_w_bytes; + void* batch_normalization_13_gamma; + size_t batch_normalization_13_gamma_bytes; + void* batch_normalization_13_beta; + size_t batch_normalization_13_beta_bytes; + void* batch_normalization_13_mean; + size_t batch_normalization_13_mean_bytes; + void* batch_normalization_13_variance; + size_t batch_normalization_13_variance_bytes; + void* depthwise_conv2d_7_w; + size_t depthwise_conv2d_7_w_bytes; + void* batch_normalization_14_gamma; + size_t batch_normalization_14_gamma_bytes; + void* batch_normalization_14_beta; + size_t batch_normalization_14_beta_bytes; + void* batch_normalization_14_mean; + size_t batch_normalization_14_mean_bytes; + void* batch_normalization_14_variance; + size_t batch_normalization_14_variance_bytes; + void* conv2d_8_w; + size_t conv2d_8_w_bytes; + void* batch_normalization_15_gamma; + size_t batch_normalization_15_gamma_bytes; + void* batch_normalization_15_beta; + size_t batch_normalization_15_beta_bytes; + void* batch_normalization_15_mean; + size_t batch_normalization_15_mean_bytes; + void* batch_normalization_15_variance; + size_t batch_normalization_15_variance_bytes; + void* depthwise_conv2d_8_w; + size_t depthwise_conv2d_8_w_bytes; + void* batch_normalization_16_gamma; + size_t batch_normalization_16_gamma_bytes; + void* batch_normalization_16_beta; + size_t batch_normalization_16_beta_bytes; + void* batch_normalization_16_mean; + size_t batch_normalization_16_mean_bytes; + void* batch_normalization_16_variance; + size_t batch_normalization_16_variance_bytes; + void* conv2d_9_w; + size_t conv2d_9_w_bytes; + void* batch_normalization_17_gamma; + size_t batch_normalization_17_gamma_bytes; + void* batch_normalization_17_beta; + size_t batch_normalization_17_beta_bytes; + void* batch_normalization_17_mean; + size_t batch_normalization_17_mean_bytes; + void* batch_normalization_17_variance; + size_t batch_normalization_17_variance_bytes; + void* depthwise_conv2d_9_w; + size_t depthwise_conv2d_9_w_bytes; + void* batch_normalization_18_gamma; + size_t batch_normalization_18_gamma_bytes; + void* batch_normalization_18_beta; + size_t batch_normalization_18_beta_bytes; + void* batch_normalization_18_mean; + size_t batch_normalization_18_mean_bytes; + void* batch_normalization_18_variance; + size_t batch_normalization_18_variance_bytes; + void* conv2d_10_w; + size_t conv2d_10_w_bytes; + void* batch_normalization_19_gamma; + size_t batch_normalization_19_gamma_bytes; + void* batch_normalization_19_beta; + size_t batch_normalization_19_beta_bytes; + void* batch_normalization_19_mean; + size_t batch_normalization_19_mean_bytes; + void* batch_normalization_19_variance; + size_t batch_normalization_19_variance_bytes; + void* depthwise_conv2d_10_w; + size_t depthwise_conv2d_10_w_bytes; + void* batch_normalization_20_gamma; + size_t batch_normalization_20_gamma_bytes; + void* batch_normalization_20_beta; + size_t batch_normalization_20_beta_bytes; + void* batch_normalization_20_mean; + size_t batch_normalization_20_mean_bytes; + void* batch_normalization_20_variance; + size_t batch_normalization_20_variance_bytes; + void* conv2d_11_w; + size_t conv2d_11_w_bytes; + void* batch_normalization_21_gamma; + size_t batch_normalization_21_gamma_bytes; + void* batch_normalization_21_beta; + size_t batch_normalization_21_beta_bytes; + void* batch_normalization_21_mean; + size_t batch_normalization_21_mean_bytes; + void* batch_normalization_21_variance; + size_t batch_normalization_21_variance_bytes; + void* depthwise_conv2d_11_w; + size_t depthwise_conv2d_11_w_bytes; + void* batch_normalization_22_gamma; + size_t batch_normalization_22_gamma_bytes; + void* batch_normalization_22_beta; + size_t batch_normalization_22_beta_bytes; + void* batch_normalization_22_mean; + size_t batch_normalization_22_mean_bytes; + void* batch_normalization_22_variance; + size_t batch_normalization_22_variance_bytes; + void* conv2d_12_w; + size_t conv2d_12_w_bytes; + void* batch_normalization_23_gamma; + size_t batch_normalization_23_gamma_bytes; + void* batch_normalization_23_beta; + size_t batch_normalization_23_beta_bytes; + void* batch_normalization_23_mean; + size_t batch_normalization_23_mean_bytes; + void* batch_normalization_23_variance; + size_t batch_normalization_23_variance_bytes; + void* depthwise_conv2d_12_w; + size_t depthwise_conv2d_12_w_bytes; + void* batch_normalization_24_gamma; + size_t batch_normalization_24_gamma_bytes; + void* batch_normalization_24_beta; + size_t batch_normalization_24_beta_bytes; + void* batch_normalization_24_mean; + size_t batch_normalization_24_mean_bytes; + void* batch_normalization_24_variance; + size_t batch_normalization_24_variance_bytes; + void* conv2d_13_w; + size_t conv2d_13_w_bytes; + void* batch_normalization_25_gamma; + size_t batch_normalization_25_gamma_bytes; + void* batch_normalization_25_beta; + size_t batch_normalization_25_beta_bytes; + void* batch_normalization_25_mean; + size_t batch_normalization_25_mean_bytes; + void* batch_normalization_25_variance; + size_t batch_normalization_25_variance_bytes; + void* depthwise_conv2d_13_w; + size_t depthwise_conv2d_13_w_bytes; + void* batch_normalization_26_gamma; + size_t batch_normalization_26_gamma_bytes; + void* batch_normalization_26_beta; + size_t batch_normalization_26_beta_bytes; + void* batch_normalization_26_mean; + size_t batch_normalization_26_mean_bytes; + void* batch_normalization_26_variance; + size_t batch_normalization_26_variance_bytes; + void* conv2d_14_w; + size_t conv2d_14_w_bytes; + void* batch_normalization_27_gamma; + size_t batch_normalization_27_gamma_bytes; + void* batch_normalization_27_beta; + size_t batch_normalization_27_beta_bytes; + void* batch_normalization_27_mean; + size_t batch_normalization_27_mean_bytes; + void* batch_normalization_27_variance; + size_t batch_normalization_27_variance_bytes; + void* dense_1_w; + size_t dense_1_w_bytes; + void* dense_1_b; + size_t dense_1_b_bytes; + + struct ret_t r; +} +RootIn; + +int main(){ + + //std::string dir_prefix = std::string("../../../../../projects/hpvm-tensor-rt/model_params/mobilenet_quant/"); + + std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/mobilenet/"); + + std::string input_path = dir_prefix + std::string("input.bin"); + std::string labels_path = dir_prefix + std::string("labels32.bin"); + std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); + void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); + std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); + void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); + std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); + void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); + std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); + void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); + std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); + void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); + std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); + void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); + std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); + void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); + std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); + void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); + std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); + void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); + std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); + void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); + std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); + void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); + std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); + void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); + std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); + void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); + std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); + void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); + std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); + void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); + std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); + void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); + std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); + void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); + std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); + void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); + std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); + void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); + std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); + void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); + std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); + void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); + std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); + void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); + void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); + void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); + void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); + std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); + void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); + std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); + void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); + void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); + void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); + void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); + std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); + void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); + std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); + void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); + void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); + void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); + void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); + std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); + void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); + std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); + void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); + void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); + void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); + std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); + void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); + std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); + void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); + std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); + void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); + void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); + void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); + void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); + std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); + void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); + std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); + void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); + void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); + void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); + void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); + std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); + void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); + std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); + void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); + void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); + void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); + void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); + std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); + void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); + std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); + void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); + void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); + void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); + std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); + void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); + std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); + void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); + std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); + void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); + void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); + void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); + void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); + std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); + void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); + std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); + void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); + void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); + void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); + void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); + std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); + void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); + std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); + void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); + void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); + void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); + void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); + std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); + void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); + std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); + void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); + void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); + void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); + void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); + std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); + void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); + std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); + void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); + void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); + void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); + void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); + std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); + void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); + std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); + void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); + void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); + void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); + void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); + std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); + void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); + std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); + void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); + void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); + void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); + void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); + std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); + void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); + std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); + void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); + void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); + void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); + void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); + std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); + void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); + std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); + void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); + void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); + void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); + void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); + std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); + void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); + std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); + void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); + void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); + void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); + void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); + std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); + void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); + std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); + void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); + void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); + void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); + void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); + std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); + void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); + std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); + void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); + void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); + void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); + std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); + void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); + std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); + void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); + std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); + void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); + void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); + void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); + void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); + std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); + void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); + std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); + void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); + void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); + void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); + void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); + std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); + void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); + std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); + void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); + void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); + void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); + std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); + void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); + std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); + void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); + std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); + void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); + //void* input = readTrainedWeights(input_path.c_str(), 0, 5000,3,32,32); + //uint8_t* labels = readLabels(labels_path.c_str(), 5000); + + __visc__init(); + RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + + //args->input = input; + //args->input_bytes = 0; + args->conv2d_1_w = conv2d_1_w; + args->conv2d_1_w_bytes = 0; + args->batch_normalization_1_gamma = batch_normalization_1_gamma; + args->batch_normalization_1_gamma_bytes = 0; + args->batch_normalization_1_beta = batch_normalization_1_beta; + args->batch_normalization_1_beta_bytes = 0; + args->batch_normalization_1_mean = batch_normalization_1_mean; + args->batch_normalization_1_mean_bytes = 0; + args->batch_normalization_1_variance = batch_normalization_1_variance; + args->batch_normalization_1_variance_bytes = 0; + args->depthwise_conv2d_1_w = depthwise_conv2d_1_w; + args->depthwise_conv2d_1_w_bytes = 0; + args->batch_normalization_2_gamma = batch_normalization_2_gamma; + args->batch_normalization_2_gamma_bytes = 0; + args->batch_normalization_2_beta = batch_normalization_2_beta; + args->batch_normalization_2_beta_bytes = 0; + args->batch_normalization_2_mean = batch_normalization_2_mean; + args->batch_normalization_2_mean_bytes = 0; + args->batch_normalization_2_variance = batch_normalization_2_variance; + args->batch_normalization_2_variance_bytes = 0; + args->conv2d_2_w = conv2d_2_w; + args->conv2d_2_w_bytes = 0; + args->batch_normalization_3_gamma = batch_normalization_3_gamma; + args->batch_normalization_3_gamma_bytes = 0; + args->batch_normalization_3_beta = batch_normalization_3_beta; + args->batch_normalization_3_beta_bytes = 0; + args->batch_normalization_3_mean = batch_normalization_3_mean; + args->batch_normalization_3_mean_bytes = 0; + args->batch_normalization_3_variance = batch_normalization_3_variance; + args->batch_normalization_3_variance_bytes = 0; + args->depthwise_conv2d_2_w = depthwise_conv2d_2_w; + args->depthwise_conv2d_2_w_bytes = 0; + args->batch_normalization_4_gamma = batch_normalization_4_gamma; + args->batch_normalization_4_gamma_bytes = 0; + args->batch_normalization_4_beta = batch_normalization_4_beta; + args->batch_normalization_4_beta_bytes = 0; + args->batch_normalization_4_mean = batch_normalization_4_mean; + args->batch_normalization_4_mean_bytes = 0; + args->batch_normalization_4_variance = batch_normalization_4_variance; + args->batch_normalization_4_variance_bytes = 0; + args->conv2d_3_w = conv2d_3_w; + args->conv2d_3_w_bytes = 0; + args->batch_normalization_5_gamma = batch_normalization_5_gamma; + args->batch_normalization_5_gamma_bytes = 0; + args->batch_normalization_5_beta = batch_normalization_5_beta; + args->batch_normalization_5_beta_bytes = 0; + args->batch_normalization_5_mean = batch_normalization_5_mean; + args->batch_normalization_5_mean_bytes = 0; + args->batch_normalization_5_variance = batch_normalization_5_variance; + args->batch_normalization_5_variance_bytes = 0; + args->depthwise_conv2d_3_w = depthwise_conv2d_3_w; + args->depthwise_conv2d_3_w_bytes = 0; + args->batch_normalization_6_gamma = batch_normalization_6_gamma; + args->batch_normalization_6_gamma_bytes = 0; + args->batch_normalization_6_beta = batch_normalization_6_beta; + args->batch_normalization_6_beta_bytes = 0; + args->batch_normalization_6_mean = batch_normalization_6_mean; + args->batch_normalization_6_mean_bytes = 0; + args->batch_normalization_6_variance = batch_normalization_6_variance; + args->batch_normalization_6_variance_bytes = 0; + args->conv2d_4_w = conv2d_4_w; + args->conv2d_4_w_bytes = 0; + args->batch_normalization_7_gamma = batch_normalization_7_gamma; + args->batch_normalization_7_gamma_bytes = 0; + args->batch_normalization_7_beta = batch_normalization_7_beta; + args->batch_normalization_7_beta_bytes = 0; + args->batch_normalization_7_mean = batch_normalization_7_mean; + args->batch_normalization_7_mean_bytes = 0; + args->batch_normalization_7_variance = batch_normalization_7_variance; + args->batch_normalization_7_variance_bytes = 0; + args->depthwise_conv2d_4_w = depthwise_conv2d_4_w; + args->depthwise_conv2d_4_w_bytes = 0; + args->batch_normalization_8_gamma = batch_normalization_8_gamma; + args->batch_normalization_8_gamma_bytes = 0; + args->batch_normalization_8_beta = batch_normalization_8_beta; + args->batch_normalization_8_beta_bytes = 0; + args->batch_normalization_8_mean = batch_normalization_8_mean; + args->batch_normalization_8_mean_bytes = 0; + args->batch_normalization_8_variance = batch_normalization_8_variance; + args->batch_normalization_8_variance_bytes = 0; + args->conv2d_5_w = conv2d_5_w; + args->conv2d_5_w_bytes = 0; + args->batch_normalization_9_gamma = batch_normalization_9_gamma; + args->batch_normalization_9_gamma_bytes = 0; + args->batch_normalization_9_beta = batch_normalization_9_beta; + args->batch_normalization_9_beta_bytes = 0; + args->batch_normalization_9_mean = batch_normalization_9_mean; + args->batch_normalization_9_mean_bytes = 0; + args->batch_normalization_9_variance = batch_normalization_9_variance; + args->batch_normalization_9_variance_bytes = 0; + args->depthwise_conv2d_5_w = depthwise_conv2d_5_w; + args->depthwise_conv2d_5_w_bytes = 0; + args->batch_normalization_10_gamma = batch_normalization_10_gamma; + args->batch_normalization_10_gamma_bytes = 0; + args->batch_normalization_10_beta = batch_normalization_10_beta; + args->batch_normalization_10_beta_bytes = 0; + args->batch_normalization_10_mean = batch_normalization_10_mean; + args->batch_normalization_10_mean_bytes = 0; + args->batch_normalization_10_variance = batch_normalization_10_variance; + args->batch_normalization_10_variance_bytes = 0; + args->conv2d_6_w = conv2d_6_w; + args->conv2d_6_w_bytes = 0; + args->batch_normalization_11_gamma = batch_normalization_11_gamma; + args->batch_normalization_11_gamma_bytes = 0; + args->batch_normalization_11_beta = batch_normalization_11_beta; + args->batch_normalization_11_beta_bytes = 0; + args->batch_normalization_11_mean = batch_normalization_11_mean; + args->batch_normalization_11_mean_bytes = 0; + args->batch_normalization_11_variance = batch_normalization_11_variance; + args->batch_normalization_11_variance_bytes = 0; + args->depthwise_conv2d_6_w = depthwise_conv2d_6_w; + args->depthwise_conv2d_6_w_bytes = 0; + args->batch_normalization_12_gamma = batch_normalization_12_gamma; + args->batch_normalization_12_gamma_bytes = 0; + args->batch_normalization_12_beta = batch_normalization_12_beta; + args->batch_normalization_12_beta_bytes = 0; + args->batch_normalization_12_mean = batch_normalization_12_mean; + args->batch_normalization_12_mean_bytes = 0; + args->batch_normalization_12_variance = batch_normalization_12_variance; + args->batch_normalization_12_variance_bytes = 0; + args->conv2d_7_w = conv2d_7_w; + args->conv2d_7_w_bytes = 0; + args->batch_normalization_13_gamma = batch_normalization_13_gamma; + args->batch_normalization_13_gamma_bytes = 0; + args->batch_normalization_13_beta = batch_normalization_13_beta; + args->batch_normalization_13_beta_bytes = 0; + args->batch_normalization_13_mean = batch_normalization_13_mean; + args->batch_normalization_13_mean_bytes = 0; + args->batch_normalization_13_variance = batch_normalization_13_variance; + args->batch_normalization_13_variance_bytes = 0; + args->depthwise_conv2d_7_w = depthwise_conv2d_7_w; + args->depthwise_conv2d_7_w_bytes = 0; + args->batch_normalization_14_gamma = batch_normalization_14_gamma; + args->batch_normalization_14_gamma_bytes = 0; + args->batch_normalization_14_beta = batch_normalization_14_beta; + args->batch_normalization_14_beta_bytes = 0; + args->batch_normalization_14_mean = batch_normalization_14_mean; + args->batch_normalization_14_mean_bytes = 0; + args->batch_normalization_14_variance = batch_normalization_14_variance; + args->batch_normalization_14_variance_bytes = 0; + args->conv2d_8_w = conv2d_8_w; + args->conv2d_8_w_bytes = 0; + args->batch_normalization_15_gamma = batch_normalization_15_gamma; + args->batch_normalization_15_gamma_bytes = 0; + args->batch_normalization_15_beta = batch_normalization_15_beta; + args->batch_normalization_15_beta_bytes = 0; + args->batch_normalization_15_mean = batch_normalization_15_mean; + args->batch_normalization_15_mean_bytes = 0; + args->batch_normalization_15_variance = batch_normalization_15_variance; + args->batch_normalization_15_variance_bytes = 0; + args->depthwise_conv2d_8_w = depthwise_conv2d_8_w; + args->depthwise_conv2d_8_w_bytes = 0; + args->batch_normalization_16_gamma = batch_normalization_16_gamma; + args->batch_normalization_16_gamma_bytes = 0; + args->batch_normalization_16_beta = batch_normalization_16_beta; + args->batch_normalization_16_beta_bytes = 0; + args->batch_normalization_16_mean = batch_normalization_16_mean; + args->batch_normalization_16_mean_bytes = 0; + args->batch_normalization_16_variance = batch_normalization_16_variance; + args->batch_normalization_16_variance_bytes = 0; + args->conv2d_9_w = conv2d_9_w; + args->conv2d_9_w_bytes = 0; + args->batch_normalization_17_gamma = batch_normalization_17_gamma; + args->batch_normalization_17_gamma_bytes = 0; + args->batch_normalization_17_beta = batch_normalization_17_beta; + args->batch_normalization_17_beta_bytes = 0; + args->batch_normalization_17_mean = batch_normalization_17_mean; + args->batch_normalization_17_mean_bytes = 0; + args->batch_normalization_17_variance = batch_normalization_17_variance; + args->batch_normalization_17_variance_bytes = 0; + args->depthwise_conv2d_9_w = depthwise_conv2d_9_w; + args->depthwise_conv2d_9_w_bytes = 0; + args->batch_normalization_18_gamma = batch_normalization_18_gamma; + args->batch_normalization_18_gamma_bytes = 0; + args->batch_normalization_18_beta = batch_normalization_18_beta; + args->batch_normalization_18_beta_bytes = 0; + args->batch_normalization_18_mean = batch_normalization_18_mean; + args->batch_normalization_18_mean_bytes = 0; + args->batch_normalization_18_variance = batch_normalization_18_variance; + args->batch_normalization_18_variance_bytes = 0; + args->conv2d_10_w = conv2d_10_w; + args->conv2d_10_w_bytes = 0; + args->batch_normalization_19_gamma = batch_normalization_19_gamma; + args->batch_normalization_19_gamma_bytes = 0; + args->batch_normalization_19_beta = batch_normalization_19_beta; + args->batch_normalization_19_beta_bytes = 0; + args->batch_normalization_19_mean = batch_normalization_19_mean; + args->batch_normalization_19_mean_bytes = 0; + args->batch_normalization_19_variance = batch_normalization_19_variance; + args->batch_normalization_19_variance_bytes = 0; + args->depthwise_conv2d_10_w = depthwise_conv2d_10_w; + args->depthwise_conv2d_10_w_bytes = 0; + args->batch_normalization_20_gamma = batch_normalization_20_gamma; + args->batch_normalization_20_gamma_bytes = 0; + args->batch_normalization_20_beta = batch_normalization_20_beta; + args->batch_normalization_20_beta_bytes = 0; + args->batch_normalization_20_mean = batch_normalization_20_mean; + args->batch_normalization_20_mean_bytes = 0; + args->batch_normalization_20_variance = batch_normalization_20_variance; + args->batch_normalization_20_variance_bytes = 0; + args->conv2d_11_w = conv2d_11_w; + args->conv2d_11_w_bytes = 0; + args->batch_normalization_21_gamma = batch_normalization_21_gamma; + args->batch_normalization_21_gamma_bytes = 0; + args->batch_normalization_21_beta = batch_normalization_21_beta; + args->batch_normalization_21_beta_bytes = 0; + args->batch_normalization_21_mean = batch_normalization_21_mean; + args->batch_normalization_21_mean_bytes = 0; + args->batch_normalization_21_variance = batch_normalization_21_variance; + args->batch_normalization_21_variance_bytes = 0; + args->depthwise_conv2d_11_w = depthwise_conv2d_11_w; + args->depthwise_conv2d_11_w_bytes = 0; + args->batch_normalization_22_gamma = batch_normalization_22_gamma; + args->batch_normalization_22_gamma_bytes = 0; + args->batch_normalization_22_beta = batch_normalization_22_beta; + args->batch_normalization_22_beta_bytes = 0; + args->batch_normalization_22_mean = batch_normalization_22_mean; + args->batch_normalization_22_mean_bytes = 0; + args->batch_normalization_22_variance = batch_normalization_22_variance; + args->batch_normalization_22_variance_bytes = 0; + args->conv2d_12_w = conv2d_12_w; + args->conv2d_12_w_bytes = 0; + args->batch_normalization_23_gamma = batch_normalization_23_gamma; + args->batch_normalization_23_gamma_bytes = 0; + args->batch_normalization_23_beta = batch_normalization_23_beta; + args->batch_normalization_23_beta_bytes = 0; + args->batch_normalization_23_mean = batch_normalization_23_mean; + args->batch_normalization_23_mean_bytes = 0; + args->batch_normalization_23_variance = batch_normalization_23_variance; + args->batch_normalization_23_variance_bytes = 0; + args->depthwise_conv2d_12_w = depthwise_conv2d_12_w; + args->depthwise_conv2d_12_w_bytes = 0; + args->batch_normalization_24_gamma = batch_normalization_24_gamma; + args->batch_normalization_24_gamma_bytes = 0; + args->batch_normalization_24_beta = batch_normalization_24_beta; + args->batch_normalization_24_beta_bytes = 0; + args->batch_normalization_24_mean = batch_normalization_24_mean; + args->batch_normalization_24_mean_bytes = 0; + args->batch_normalization_24_variance = batch_normalization_24_variance; + args->batch_normalization_24_variance_bytes = 0; + args->conv2d_13_w = conv2d_13_w; + args->conv2d_13_w_bytes = 0; + args->batch_normalization_25_gamma = batch_normalization_25_gamma; + args->batch_normalization_25_gamma_bytes = 0; + args->batch_normalization_25_beta = batch_normalization_25_beta; + args->batch_normalization_25_beta_bytes = 0; + args->batch_normalization_25_mean = batch_normalization_25_mean; + args->batch_normalization_25_mean_bytes = 0; + args->batch_normalization_25_variance = batch_normalization_25_variance; + args->batch_normalization_25_variance_bytes = 0; + args->depthwise_conv2d_13_w = depthwise_conv2d_13_w; + args->depthwise_conv2d_13_w_bytes = 0; + args->batch_normalization_26_gamma = batch_normalization_26_gamma; + args->batch_normalization_26_gamma_bytes = 0; + args->batch_normalization_26_beta = batch_normalization_26_beta; + args->batch_normalization_26_beta_bytes = 0; + args->batch_normalization_26_mean = batch_normalization_26_mean; + args->batch_normalization_26_mean_bytes = 0; + args->batch_normalization_26_variance = batch_normalization_26_variance; + args->batch_normalization_26_variance_bytes = 0; + args->conv2d_14_w = conv2d_14_w; + args->conv2d_14_w_bytes = 0; + args->batch_normalization_27_gamma = batch_normalization_27_gamma; + args->batch_normalization_27_gamma_bytes = 0; + args->batch_normalization_27_beta = batch_normalization_27_beta; + args->batch_normalization_27_beta_bytes = 0; + args->batch_normalization_27_mean = batch_normalization_27_mean; + args->batch_normalization_27_mean_bytes = 0; + args->batch_normalization_27_variance = batch_normalization_27_variance; + args->batch_normalization_27_variance_bytes = 0; + args->dense_1_w = dense_1_w; + args->dense_1_w_bytes = 0; + args->dense_1_b = dense_1_b; + args->dense_1_b_bytes = 0; + + int batch_size = 500; + int test_input_size = 5000; + int batch_count = test_input_size / batch_size; + + // void* input = create4DTensor(0,nchw,batch_size,3,32,32); + + startMemTracking(); + startProfiling(); + + for (int i = 0; i < batch_count; i++){ + + int start = i * batch_size; + int end = (i + 1) * batch_size; + + // copyInputBatch(input_path.c_str(),start,end,3,32,32, input); + void* input = readInputBatch(input_path.c_str(), 0, + start, end, + 3, 32, 32); + + args->input = input; + args->input_bytes = 0; + + void* dfg = __visc__launch(0, root, (void*) args); + + __visc__wait(dfg); + + void *result = static_cast<RootIn*>(args)->input; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + + freeBatchMemory(); + } + stopProfiling(); + __visc__cleanup(); + + return 0; +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/src/resnet18_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/src/resnet18_loop.cpp index 1f81dba0c7ca6d5334565b34d7081b6155fa0569..4c91f0e6619558d30f3819a6161fb73e5b6aaf7f 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/src/resnet18_loop.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/src/resnet18_loop.cpp @@ -1427,7 +1427,7 @@ int main(){ args->dense_1_b_bytes = 0; int batch_size = 500; - int test_input_size = 10000; + int test_input_size = 5000; int batch_count = test_input_size / batch_size; // NOTE-HASHIM: commented out diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/src/vgg16_cifar10_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/src/vgg16_cifar10_loop.cpp index a0442904255ba6c6a6399205775693be1952df09..793139b9a868b2d7a80b1c7352d83056853a8175 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/src/vgg16_cifar10_loop.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/src/vgg16_cifar10_loop.cpp @@ -971,7 +971,7 @@ int main(){ int batch_size = 500; - int test_input_size = 10000; + int test_input_size = 5000; int batch_count = test_input_size / batch_size; // void* input = create4DTensor(0,nchw,batch_size,3,32,32); diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/src/vgg16_cifar100_loop.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/src/vgg16_cifar100_loop.cpp index b8126be3142749280791687b634dc6e697f07b77..7bb78e07bb8d6bda27b961753c9ccf3771151863 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/src/vgg16_cifar100_loop.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/src/vgg16_cifar100_loop.cpp @@ -973,7 +973,7 @@ int main(){ int batch_size = 500; - int test_input_size = 10000; + int test_input_size = 5000; int batch_count = test_input_size / batch_size; startMemTracking();