Merge branch 'approx_hpvm' of https://gitlab.engr.illinois.edu/llvm/hpvm into approx_hpvm

eebf5076 · hsharif3 · 941f8321 · 9286101b · eebf5076 · eebf5076
Commit eebf5076 authored 5 years ago by hsharif3
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Build hpvm
 ```shell
 mkdir install
 mkdir build && cd build
-cmake ../llvm -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" -DCMAKE_INSTALL_PREFIX=../install
+cmake ../llvm -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX=../install
 make -j<number of threads you want to use to build hpvm>
 ```

--- a/llvm/projects/hpvm-tensor-rt/bin/times.py
+++ b/llvm/projects/hpvm-tensor-rt/bin/times.py
+class Config:
+  def __init__(self):
+    self.runtime = 0
+    self.fed_runs = 0
+    self.full_runs = 0
+def computeTimes(bench):
+  conf_runs = 60
+  fed_time = (bench.runtime * 100) + (bench.fed_runs * conf_runs * bench.runtime)
+  fed_time_hrs = fed_time / (60*60)
+  full_time = (bench.runtime * 1000) + (bench.full_runs * conf_runs * bench.runtime)
+  full_time_hrs = full_time / (60*60)
+  print ("fedtime_hrs = ", fed_time_hrs, " full_time_hrs = ", full_time_hrs, "\n")
+if __name__ == "__main__":
+  resnet = Config()
+  resnet.runtime = 8
+  resnet.fed_runs = 3
+  resnet.full_runs = 5
+  computeTimes(resnet)
+  alexnet = Config()
+  alexnet.runtime = 7.8
+  alexnet.fed_runs = 47
+  alexnet.full_runs = 274
+  computeTimes(alexnet)
+  alexnet2 = Config()
+  alexnet2.runtime = 2.3
+  alexnet2.fed_runs = 62
+  alexnet2.full_runs = 339
+  computeTimes(alexnet2)
+  vgg1 = Config()
+  vgg1.runtime = 7.4
+  vgg1.fed_runs = 15
+  vgg1.full_runs = 211
+  computeTimes(vgg1)
+  vgg2 = Config()
+  vgg2.runtime = 15.4
+  vgg2.fed_runs = 8
+  vgg2.full_runs = 150
+  computeTimes(vgg2)
+  lenet = Config()
+  lenet.runtime = 0.98
+  lenet.fed_runs = 64
+  lenet.full_runs = 228
+  computeTimes(lenet)
+  mobilenet = Config()
+  mobilenet.runtime = 11
+  mobilenet.fed_runs = 32
+  mobilenet.full_runs = 267
+  computeTimes(mobilenet)
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/CMakeLists.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/CMakeLists.txt
+cmake_minimum_required (VERSION 2.6)
+project (cudnn-training)
+find_package(CUDA 6.5 REQUIRED)
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+  message("Debug mode")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-g;-lineinfo;-Xcompiler;-ggdb;-lcurand)
+else()
+   set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand)
+endif()
+set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11  -I/  " )
+add_definitions(-DNO_INJECTION)
+add_definitions(-DPROMISE_TUNER_ENABLED)
+if(USE_GFLAGS)
+  add_definitions(-DUSE_GFLAGS)
+endif()
+if(USE_AUTOTUNER)
+  remove_definitions(-DNO_INJECTION)
+endif()
+include_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/include)
+include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/./tensor_runtime/include)
+include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/include)
+include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/include)
+link_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib64)
+cuda_add_library(tensor_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu)
+cuda_add_cublas_to_target(tensor_runtime)
+cuda_add_library(tensor_cpu_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc)
+find_library(GPU_PROFILER_LIB
+    NAMES libgpu_profiler.a
+    HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/lib
+)
+find_library(SOC_SIMULATOR_LIB
+    NAMES libpromise_profiler.a
+    HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/lib
+)
+if(USE_GFLAGS)
+  target_link_libraries(tensor_runtime gflags cudnn -lcurand)
+else()
+  target_link_libraries(tensor_runtime cudnn -lcurand)
+endif()
+target_link_libraries(tensor_cpu_runtime)
+# lenet_keras_half_autogenerated_knobs
+add_executable(lenet_keras_fp16_perf20 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf20.cc)
+target_link_libraries(lenet_keras_fp16_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf26 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf26.cc)
+target_link_libraries(lenet_keras_fp16_perf26 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf22 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf22.cc)
+target_link_libraries(lenet_keras_fp16_perf22 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf25 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf25.cc)
+target_link_libraries(lenet_keras_fp16_perf25 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf23 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf23.cc)
+target_link_libraries(lenet_keras_fp16_perf23 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_samp33 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp33.cc)
+target_link_libraries(lenet_keras_fp16_samp33 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf24 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf24.cc)
+target_link_libraries(lenet_keras_fp16_perf24 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_samp31 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp31.cc)
+target_link_libraries(lenet_keras_fp16_samp31 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf30 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf30.cc)
+target_link_libraries(lenet_keras_fp16_perf30 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_samp36 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp36.cc)
+target_link_libraries(lenet_keras_fp16_samp36 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf21 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf21.cc)
+target_link_libraries(lenet_keras_fp16_perf21 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_samp34 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp34.cc)
+target_link_libraries(lenet_keras_fp16_samp34 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_samp32 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp32.cc)
+target_link_libraries(lenet_keras_fp16_samp32 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_samp35 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp35.cc)
+target_link_libraries(lenet_keras_fp16_samp35 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf29 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf29.cc)
+target_link_libraries(lenet_keras_fp16_perf29 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf27 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf27.cc)
+target_link_libraries(lenet_keras_fp16_perf27 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+add_executable(lenet_keras_fp16_perf28 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf28.cc)
+target_link_libraries(lenet_keras_fp16_perf28 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+# lenet_keras_autogenerated_knobs
+add_executable(lenet_keras_fp32_perf20 lenet_keras_autogenerated_knobs/lenet_keras_fp32_perf20.cc)
+target_link_libraries(lenet_keras_fp32_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16.txt
+../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc
+../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc
+../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc
+../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc
+../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc
+../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
+../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16_first_three.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16_first_three.txt
+../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc
+../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc
+../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16_remainder.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16_remainder.txt
+../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc
+../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc
+../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
+../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16_sources.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp16_sources.txt
+../dnn_sources/src/half/lenet_keras_half.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp32.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp32.txt
+../dnn_sources/src/profiling/alexnet2_profiling.cc
+../dnn_sources/src/profiling/alexnet_cifar10_profiling.cc
+../dnn_sources/src/profiling/mobilenet_cifar10_profiling.cc
+../dnn_sources/src/profiling/mobilenet_shallow_profiling.cc
+../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc
+../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
+../dnn_sources/src/profiling/resnet18_cifar10_profiling.cc
+../dnn_sources/src/profiling/vgg16_cifar100_profiling.cc
+../dnn_sources/src/profiling/vgg16_cifar10_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp32_sources.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp32_sources.txt
+../dnn_sources/src/lenet_keras.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp32_test.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_fp32_test.txt
+../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_mobilenet_depth.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_mobilenet_depth.txt
+../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc
+../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_one_file.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/filenames_one_file.txt
+../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
+../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16.txt
+perf,20 1,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,21 1,2,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,22 1,2,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,23 1,3,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,24 1,3,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,25 1,3,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,26 2,1,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,27 2,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,28 3,1,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,29 3,1,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,30 3,1,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,31 1,1,2,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,32 1,1,2,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,33 1,1,4,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,34 1,1,4,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,35 1,1,4,2     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,36 1,1,4,3     1.88    tensorHalfConvolution   tensorConvApproxHalf
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_knobs_31_36.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_knobs_31_36.txt
+samp,31 1,1,2,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,32 1,1,2,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,33 1,1,4,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,34 1,1,4,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,35 1,1,4,2     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,36 1,1,4,3     1.88    tensorHalfConvolution   tensorConvApproxHalf
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_old.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_old.txt
+perf,20 1,1,0   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,21 1,2,0   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,22 1,2,1   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,23 1,3,0   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,24 1,3,1   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,25 1,3,2   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,26 2,1,0   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,27 2,1,1   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,28 3,1,0   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,29 3,1,1   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
+perf,30 3,1,2   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
+samp,31 2,0     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,32 2,1     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,33 4,0     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,34 4,1     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,35 4,2     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,36 4,3     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,37 1,1     1.88    tensorHalfConvolution   tensorConvInputHalf
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_samp.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_samp.txt
+samp,31 2,0     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,32 2,1     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,33 4,0     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,34 4,1     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,35 4,2     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,36 4,3     1.88    tensorHalfConvolution   tensorConvInputHalf
+samp,37 1,1     1.88    tensorHalfConvolution   tensorConvInputHalf
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_vgg16.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp16_vgg16.txt
+perf,20 1,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,21 1,2,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,22 1,2,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,23 1,3,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,24 1,3,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,25 1,3,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,26 2,1,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,27 2,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
+perf,28 3,1,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,29 3,1,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
+perf,30 3,1,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,32 1,1,2,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
+samp,36 1,1,4,3     1.88    tensorHalfConvolution   tensorConvApproxHalf
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp32.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp32.txt
+perf,20 1,1,1,1   2.25    tensorConvolution   tensorConvApprox
+perf,21 1,2,1,0   2.25    tensorConvolution   tensorConvApprox
+perf,22 1,2,1,1   2.25    tensorConvolution   tensorConvApprox
+perf,23 1,3,1,0   1.88    tensorConvolution   tensorConvApprox
+perf,24 1,3,1,1   1.88    tensorConvolution   tensorConvApprox
+perf,25 1,3,1,2   1.88    tensorConvolution   tensorConvApprox
+perf,26 2,1,1,0   2.25    tensorConvolution   tensorConvApprox
+perf,27 2,1,1,1   2.25    tensorConvolution   tensorConvApprox
+perf,28 3,1,1,0   1.88    tensorConvolution   tensorConvApprox
+perf,29 3,1,1,1   1.88    tensorConvolution   tensorConvApprox
+perf,30 3,1,1,2   1.88    tensorConvolution   tensorConvApprox
+samp,31 1,1,2,0     1.88    tensorConvolution   tensorConvApprox
+samp,32 1,1,2,1     1.88    tensorConvolution   tensorConvApprox
+samp,33 1,1,4,0     1.88    tensorConvolution   tensorConvApprox
+samp,34 1,1,4,1     1.88    tensorConvolution   tensorConvApprox
+samp,35 1,1,4,2     1.88    tensorConvolution   tensorConvApprox
+samp,36 1,1,4,3     1.88    tensorConvolution   tensorConvApprox
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp32_baseline.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp32_baseline.txt
+perf,20 1,1,1,1   2.25    tensorConvolution   tensorConvApprox
--- a/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp32_old.txt
+++ b/llvm/projects/hpvm-tensor-rt/code_autogenerators/knob_config_fp32_old.txt
+perf,20 1,1,0   2.25    tensorConvolution   tensorConvPerfCuda
+perf,21 1,2,0   2.25    tensorConvolution   tensorConvPerfCuda
+perf,22 1,2,1   2.25    tensorConvolution   tensorConvPerfCuda
+perf,23 1,3,0   1.88    tensorConvolution   tensorConvPerfCuda
+perf,24 1,3,1   1.88    tensorConvolution   tensorConvPerfCuda
+perf,25 1,3,2   1.88    tensorConvolution   tensorConvPerfCuda
+perf,26 2,1,0   2.25    tensorConvolution   tensorConvPerfCuda
+perf,27 2,1,1   2.25    tensorConvolution   tensorConvPerfCuda
+perf,28 3,1,0   1.88    tensorConvolution   tensorConvPerfCuda
+perf,29 3,1,1   1.88    tensorConvolution   tensorConvPerfCuda
+perf,30 3,1,2   1.88    tensorConvolution   tensorConvPerfCuda