Skip to content
Snippets Groups Projects
Commit eebf5076 authored by hsharif3's avatar hsharif3
Browse files

Merge branch 'approx_hpvm' of https://gitlab.engr.illinois.edu/llvm/hpvm into approx_hpvm

parents 941f8321 9286101b
No related branches found
No related tags found
No related merge requests found
Showing
with 318 additions and 1 deletion
......@@ -31,7 +31,7 @@ Build hpvm
```shell
mkdir install
mkdir build && cd build
cmake ../llvm -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" -DCMAKE_INSTALL_PREFIX=../install
cmake ../llvm -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="X86" -DCMAKE_INSTALL_PREFIX=../install
make -j<number of threads you want to use to build hpvm>
```
......
class Config:
def __init__(self):
self.runtime = 0
self.fed_runs = 0
self.full_runs = 0
def computeTimes(bench):
conf_runs = 60
fed_time = (bench.runtime * 100) + (bench.fed_runs * conf_runs * bench.runtime)
fed_time_hrs = fed_time / (60*60)
full_time = (bench.runtime * 1000) + (bench.full_runs * conf_runs * bench.runtime)
full_time_hrs = full_time / (60*60)
print ("fedtime_hrs = ", fed_time_hrs, " full_time_hrs = ", full_time_hrs, "\n")
if __name__ == "__main__":
resnet = Config()
resnet.runtime = 8
resnet.fed_runs = 3
resnet.full_runs = 5
computeTimes(resnet)
alexnet = Config()
alexnet.runtime = 7.8
alexnet.fed_runs = 47
alexnet.full_runs = 274
computeTimes(alexnet)
alexnet2 = Config()
alexnet2.runtime = 2.3
alexnet2.fed_runs = 62
alexnet2.full_runs = 339
computeTimes(alexnet2)
vgg1 = Config()
vgg1.runtime = 7.4
vgg1.fed_runs = 15
vgg1.full_runs = 211
computeTimes(vgg1)
vgg2 = Config()
vgg2.runtime = 15.4
vgg2.fed_runs = 8
vgg2.full_runs = 150
computeTimes(vgg2)
lenet = Config()
lenet.runtime = 0.98
lenet.fed_runs = 64
lenet.full_runs = 228
computeTimes(lenet)
mobilenet = Config()
mobilenet.runtime = 11
mobilenet.fed_runs = 32
mobilenet.full_runs = 267
computeTimes(mobilenet)
cmake_minimum_required (VERSION 2.6)
project (cudnn-training)
find_package(CUDA 6.5 REQUIRED)
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
message("Debug mode")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-g;-lineinfo;-Xcompiler;-ggdb;-lcurand)
else()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand)
endif()
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -I/ " )
add_definitions(-DNO_INJECTION)
add_definitions(-DPROMISE_TUNER_ENABLED)
if(USE_GFLAGS)
add_definitions(-DUSE_GFLAGS)
endif()
if(USE_AUTOTUNER)
remove_definitions(-DNO_INJECTION)
endif()
include_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/include)
include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/./tensor_runtime/include)
include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/include)
include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/include)
link_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib64)
cuda_add_library(tensor_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu)
cuda_add_cublas_to_target(tensor_runtime)
cuda_add_library(tensor_cpu_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc)
find_library(GPU_PROFILER_LIB
NAMES libgpu_profiler.a
HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/lib
)
find_library(SOC_SIMULATOR_LIB
NAMES libpromise_profiler.a
HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/lib
)
if(USE_GFLAGS)
target_link_libraries(tensor_runtime gflags cudnn -lcurand)
else()
target_link_libraries(tensor_runtime cudnn -lcurand)
endif()
target_link_libraries(tensor_cpu_runtime)
# lenet_keras_half_autogenerated_knobs
add_executable(lenet_keras_fp16_perf20 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf20.cc)
target_link_libraries(lenet_keras_fp16_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf26 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf26.cc)
target_link_libraries(lenet_keras_fp16_perf26 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf22 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf22.cc)
target_link_libraries(lenet_keras_fp16_perf22 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf25 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf25.cc)
target_link_libraries(lenet_keras_fp16_perf25 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf23 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf23.cc)
target_link_libraries(lenet_keras_fp16_perf23 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_samp33 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp33.cc)
target_link_libraries(lenet_keras_fp16_samp33 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf24 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf24.cc)
target_link_libraries(lenet_keras_fp16_perf24 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_samp31 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp31.cc)
target_link_libraries(lenet_keras_fp16_samp31 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf30 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf30.cc)
target_link_libraries(lenet_keras_fp16_perf30 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_samp36 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp36.cc)
target_link_libraries(lenet_keras_fp16_samp36 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf21 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf21.cc)
target_link_libraries(lenet_keras_fp16_perf21 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_samp34 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp34.cc)
target_link_libraries(lenet_keras_fp16_samp34 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_samp32 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp32.cc)
target_link_libraries(lenet_keras_fp16_samp32 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_samp35 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp35.cc)
target_link_libraries(lenet_keras_fp16_samp35 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf29 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf29.cc)
target_link_libraries(lenet_keras_fp16_perf29 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf27 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf27.cc)
target_link_libraries(lenet_keras_fp16_perf27 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(lenet_keras_fp16_perf28 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf28.cc)
target_link_libraries(lenet_keras_fp16_perf28 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
# lenet_keras_autogenerated_knobs
add_executable(lenet_keras_fp32_perf20 lenet_keras_autogenerated_knobs/lenet_keras_fp32_perf20.cc)
target_link_libraries(lenet_keras_fp32_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc
../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc
../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc
../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc
../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc
../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
../dnn_sources/src/half/lenet_keras_half.cc
../dnn_sources/src/profiling/alexnet2_profiling.cc
../dnn_sources/src/profiling/alexnet_cifar10_profiling.cc
../dnn_sources/src/profiling/mobilenet_cifar10_profiling.cc
../dnn_sources/src/profiling/mobilenet_shallow_profiling.cc
../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc
../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
../dnn_sources/src/profiling/resnet18_cifar10_profiling.cc
../dnn_sources/src/profiling/vgg16_cifar100_profiling.cc
../dnn_sources/src/profiling/vgg16_cifar10_profiling.cc
../dnn_sources/src/lenet_keras.cc
../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc
../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
perf,20 1,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,21 1,2,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,22 1,2,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,23 1,3,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,24 1,3,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,25 1,3,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,26 2,1,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,27 2,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,28 3,1,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,29 3,1,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,30 3,1,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,31 1,1,2,0 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,32 1,1,2,1 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,33 1,1,4,0 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,34 1,1,4,1 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,35 1,1,4,2 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,36 1,1,4,3 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,31 1,1,2,0 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,32 1,1,2,1 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,33 1,1,4,0 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,34 1,1,4,1 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,35 1,1,4,2 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,36 1,1,4,3 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,20 1,1,0 2.25 tensorHalfConvolution tensorConvPerfCudaHalf
perf,21 1,2,0 2.25 tensorHalfConvolution tensorConvPerfCudaHalf
perf,22 1,2,1 2.25 tensorHalfConvolution tensorConvPerfCudaHalf
perf,23 1,3,0 1.88 tensorHalfConvolution tensorConvPerfCudaHalf
perf,24 1,3,1 1.88 tensorHalfConvolution tensorConvPerfCudaHalf
perf,25 1,3,2 1.88 tensorHalfConvolution tensorConvPerfCudaHalf
perf,26 2,1,0 2.25 tensorHalfConvolution tensorConvPerfCudaHalf
perf,27 2,1,1 2.25 tensorHalfConvolution tensorConvPerfCudaHalf
perf,28 3,1,0 1.88 tensorHalfConvolution tensorConvPerfCudaHalf
perf,29 3,1,1 1.88 tensorHalfConvolution tensorConvPerfCudaHalf
perf,30 3,1,2 1.88 tensorHalfConvolution tensorConvPerfCudaHalf
samp,31 2,0 1.88 tensorHalfConvolution tensorConvInputHalf
samp,32 2,1 1.88 tensorHalfConvolution tensorConvInputHalf
samp,33 4,0 1.88 tensorHalfConvolution tensorConvInputHalf
samp,34 4,1 1.88 tensorHalfConvolution tensorConvInputHalf
samp,35 4,2 1.88 tensorHalfConvolution tensorConvInputHalf
samp,36 4,3 1.88 tensorHalfConvolution tensorConvInputHalf
samp,37 1,1 1.88 tensorHalfConvolution tensorConvInputHalf
samp,31 2,0 1.88 tensorHalfConvolution tensorConvInputHalf
samp,32 2,1 1.88 tensorHalfConvolution tensorConvInputHalf
samp,33 4,0 1.88 tensorHalfConvolution tensorConvInputHalf
samp,34 4,1 1.88 tensorHalfConvolution tensorConvInputHalf
samp,35 4,2 1.88 tensorHalfConvolution tensorConvInputHalf
samp,36 4,3 1.88 tensorHalfConvolution tensorConvInputHalf
samp,37 1,1 1.88 tensorHalfConvolution tensorConvInputHalf
perf,20 1,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,21 1,2,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,22 1,2,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,23 1,3,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,24 1,3,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,25 1,3,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,26 2,1,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,27 2,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf
perf,28 3,1,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,29 3,1,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,30 3,1,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,32 1,1,2,1 1.88 tensorHalfConvolution tensorConvApproxHalf
samp,36 1,1,4,3 1.88 tensorHalfConvolution tensorConvApproxHalf
perf,20 1,1,1,1 2.25 tensorConvolution tensorConvApprox
perf,21 1,2,1,0 2.25 tensorConvolution tensorConvApprox
perf,22 1,2,1,1 2.25 tensorConvolution tensorConvApprox
perf,23 1,3,1,0 1.88 tensorConvolution tensorConvApprox
perf,24 1,3,1,1 1.88 tensorConvolution tensorConvApprox
perf,25 1,3,1,2 1.88 tensorConvolution tensorConvApprox
perf,26 2,1,1,0 2.25 tensorConvolution tensorConvApprox
perf,27 2,1,1,1 2.25 tensorConvolution tensorConvApprox
perf,28 3,1,1,0 1.88 tensorConvolution tensorConvApprox
perf,29 3,1,1,1 1.88 tensorConvolution tensorConvApprox
perf,30 3,1,1,2 1.88 tensorConvolution tensorConvApprox
samp,31 1,1,2,0 1.88 tensorConvolution tensorConvApprox
samp,32 1,1,2,1 1.88 tensorConvolution tensorConvApprox
samp,33 1,1,4,0 1.88 tensorConvolution tensorConvApprox
samp,34 1,1,4,1 1.88 tensorConvolution tensorConvApprox
samp,35 1,1,4,2 1.88 tensorConvolution tensorConvApprox
samp,36 1,1,4,3 1.88 tensorConvolution tensorConvApprox
perf,20 1,1,1,1 2.25 tensorConvolution tensorConvApprox
perf,20 1,1,0 2.25 tensorConvolution tensorConvPerfCuda
perf,21 1,2,0 2.25 tensorConvolution tensorConvPerfCuda
perf,22 1,2,1 2.25 tensorConvolution tensorConvPerfCuda
perf,23 1,3,0 1.88 tensorConvolution tensorConvPerfCuda
perf,24 1,3,1 1.88 tensorConvolution tensorConvPerfCuda
perf,25 1,3,2 1.88 tensorConvolution tensorConvPerfCuda
perf,26 2,1,0 2.25 tensorConvolution tensorConvPerfCuda
perf,27 2,1,1 2.25 tensorConvolution tensorConvPerfCuda
perf,28 3,1,0 1.88 tensorConvolution tensorConvPerfCuda
perf,29 3,1,1 1.88 tensorConvolution tensorConvPerfCuda
perf,30 3,1,2 1.88 tensorConvolution tensorConvPerfCuda
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment