Skip to content
Snippets Groups Projects
Commit bfce1048 authored by kotsifa2's avatar kotsifa2
Browse files

Merge branch 'approx_hpvm' of gitlab.engr.illinois.edu:llvm/hpvm into approx_hpvm

parents f026fa96 4c81ec1b
No related branches found
No related tags found
No related merge requests found
......@@ -27,17 +27,37 @@ def parse_binary_output(proc_output):
return avg_time
# Input: a list of tuples of benchmark names
# Can change to input a file containing benchmarks to run
def run_benchmarks(builds_dir, output_filename, should_print_bin_output = True):
output_file = open(output_filename, "w")
def get_sorted_binaries(builds_dir):
# dict of network names to lists of binaries
# list of binaries should be in sorted order (can do that when we run the benchmarks)
network_bins = defaultdict(list)
for bin_name in os.listdir(builds_dir):
if bin_name.find("profiling") == -1:
continue
output_file.write("%s: %s\n" % (bin_name, \
network_name = bin_name[ : bin_name.rfind("_")]
network_bins[network_name].append(bin_name)
return network_bins
# Input: a list of tuples of benchmark names
# Can change to input a file containing benchmarks to run
def run_benchmarks(sorted_bins, builds_dir, output_filename, should_print_bin_output = False):
def get_knob_id(bin_name):
return int(bin_name[bin_name.rfind("_") + 1 : ])
output_file = open(output_filename, "w", buffering = 0)
for network_name in sorted_bins:
# Sort the binaries in order by knob id
sorted_bins[network_name].sort(key = get_knob_id)
print("--------------------------------------")
print(network_name)
# Go through all binaries
for bin_name in sorted_bins[network_name]:
print(bin_name)
output_file.write("%s results\n" % bin_name)
'''output_file.write("%s: %s\n" % (bin_name, \
parse_binary_output(run_benchmark(os.path.join(builds_dir, bin_name), \
should_print_bin_output))))
print(bin_name)
should_print_bin_output))))'''
print("--------------------------------------\n")
output_file.close()
......@@ -48,4 +68,5 @@ if __name__ == "__main__":
print("Usage: python online_benchmark_testing_automator.py <builds dir> <outputs_file_name>")
exit(1)
print("Output file name: %s" % sys.argv[2])
run_benchmarks(sys.argv[1], sys.argv[2])
sorted_bins = get_sorted_binaries(sys.argv[1])
run_benchmarks(sorted_bins, sys.argv[1], sys.argv[2])
DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
# NOTE: can configure build directory
#HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_hpvm/
HPVM_BUILD_DIR = $(LLVM_BUILD_ROOT)
CC = $(HPVM_BUILD_DIR)/bin/clang++
......@@ -15,9 +16,10 @@ APP = alexnet
TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a
PROFILER_LIB_DIR = $(LLVM_SRC_ROOT)/projects/gpu_profiler/lib/libgpu_profiler.a
SOC_SIMULATOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/soc_simulator/lib/libpromise_profiler.a
TENSOR_AUTOTUNER_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_autotuner.a
CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3
CCFLAGS += -DDEVICE=CUDNN_TARGET
LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL
......@@ -58,15 +60,17 @@ $(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll
$(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP)_promise.ll -S -o $(BUILD_DIR)/$(APP)_promise.visc.ll
$(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP)_loop.ll -S -o $(BUILD_DIR)/$(APP)_loop.visc.ll
$(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_cudnn.bc
$(OPT) $(VISC_OPTFLAGS2) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_promise.bc
$(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP)_loop.visc.ll -o $(BUILD_DIR)/$(APP)_loop.bc
#$(OPT) $(VISC_OPTFLAGS2) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_promise.bc
$(OPT) $(VISC_OPTFLAGS3) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_wrapperapi.bc
$(OPT) $(VISC_OPTFLAGS3) $(BUILD_DIR)/$(APP)_loop.visc.ll -o $(BUILD_DIR)/$(APP)_loop_wrapperapi.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_cudnn.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_cudnn_linked.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_promise.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_promise_linked.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_loop.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_loop_linked.bc
$(CC) $(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_cudnn_linked $(LINKER_FLAGS)
$(CC) $(BUILD_DIR)/$(APP)_promise_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_promise_linked $(LINKER_FLAGS)
$(CC) $(BUILD_DIR)/$(APP)_loop_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_loop_linked $(LINKER_FLAGS)
#$(CC) $(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/lenet_tune $(LINKER_FLAGS)
#$(LLVM_LINK) $(BUILD_DIR)/$(APP)_promise.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_promise_linked.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_wrapperapi.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_wrapperapi_linked.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_loop_wrapperapi.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_loop_wrapperapi_linked.bc
$(CC) $(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_cudnn_linked $(LINKER_FLAGS)
#$(CC) $(BUILD_DIR)/$(APP)_promise_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_promise_linked $(LINKER_FLAGS)
$(CC) $(BUILD_DIR)/$(APP)_wrapperapi_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_wrapperapi_linked $(LINKER_FLAGS)
$(CC) $(BUILD_DIR)/$(APP)_loop_wrapperapi_linked.bc $(TENSOR_LIB_DIR) $(PROFILER_LIB_DIR) $(SOC_SIMULATOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_loop_wrapperapi_linked $(LINKER_FLAGS)
$(BUILD_DIR):
mkdir -p $@
......
......@@ -9,8 +9,10 @@
#include <tensorTypes.h>
#include <tensorUtils.h>
void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1);
......@@ -18,7 +20,7 @@ void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
......@@ -26,7 +28,7 @@ void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_2_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
......@@ -34,7 +36,7 @@ void var_2_node(void* t1, size_t bytes_t1) {
}
void var_3_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
......@@ -42,7 +44,7 @@ void var_3_node(void* t1, size_t bytes_t1) {
}
void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
......@@ -50,7 +52,7 @@ void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
......@@ -58,7 +60,7 @@ void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_6_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
......@@ -66,7 +68,7 @@ void var_6_node(void* t1, size_t bytes_t1) {
}
void var_7_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
......@@ -74,7 +76,7 @@ void var_7_node(void* t1, size_t bytes_t1) {
}
void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
......@@ -82,7 +84,7 @@ void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
......@@ -90,7 +92,7 @@ void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_10_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
......@@ -98,7 +100,7 @@ void var_10_node(void* t1, size_t bytes_t1) {
}
void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
......@@ -106,7 +108,7 @@ void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
......@@ -114,7 +116,7 @@ void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_13_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
......@@ -122,7 +124,7 @@ void var_13_node(void* t1, size_t bytes_t1) {
}
void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
......@@ -130,7 +132,7 @@ void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
......@@ -138,7 +140,7 @@ void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_16_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
......@@ -146,7 +148,7 @@ void var_16_node(void* t1, size_t bytes_t1) {
}
void var_17_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
......@@ -154,7 +156,7 @@ void var_17_node(void* t1, size_t bytes_t1) {
}
void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_mul(t1, t2);
......@@ -162,7 +164,7 @@ void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
}
void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CUDNN_TARGET);
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
......@@ -177,6 +179,8 @@ void var_20_node(void* t1, size_t bytes_t1) {
__visc__return(2, r, (size_t) 0);
}
void root(void* input, size_t input_bytes,
void* conv2d_1_w, size_t conv2d_1_w_bytes,
void* conv2d_1_b, size_t conv2d_1_b_bytes,
......@@ -371,9 +375,10 @@ int main(){
std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/alexnet_cifar10_test/");
std::string input_path = dir_prefix + std::string("input.bin");
//void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32);
std::string labels_path = dir_prefix + std::string("labels32.bin");
//uint8_t* labels = readLabels(labels_path.c_str(),10000);
uint8_t* labels = readLabels(labels_path.c_str(),5000);
std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11);
std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin");
......@@ -404,6 +409,8 @@ int main(){
__visc__init();
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
//args->input = input;
//args->input_bytes = 0;
args->conv2d_1_w = conv2d_1_w;
args->conv2d_1_w_bytes = 0;
args->conv2d_1_b = conv2d_1_b;
......@@ -429,48 +436,38 @@ int main(){
args->dense_1_b = dense_1_b;
args->dense_1_b_bytes = 0;
int batch_size = 500;
int test_input_size = 10000;
int batch_count = test_input_size / batch_size;
std::string input_path = dir_prefix + std::string("input.bin");
int batch_size = 500;
int test_input_size = 10000;
int batch_count = test_input_size / batch_size;
void* input = create4DTensor(0,nchw,batch_size,3,32,32);
startMemTracking();
for (int i = 0; i < batch_count; i++){
startProfiling();
int start = i * batch_size;
int end = (i + 1) * batch_size;
for (int i = 0; i < batch_count; i++){
int start = i * batch_size;
int end = (i + 1) * batch_size;
copyInputBatch(input_path.c_str(),start,end,3,32,32, input);
args->input = input;
args->input = input;
args->input_bytes = 0;
//void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32);
void* dfg = __visc__launch(0, root, (void*) args);
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->input;
hpvm_request_tensor(result, 0);
llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
void *result = static_cast<RootIn*>(args)->input;
hpvm_request_tensor(result, 0);
uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end);
computeAccuracy3(labels, result);
llvm_hpvm_invokeRtControl2(result, labels);
freeBatchMemory();
}
__visc__cleanup();
stopProfiling();
__visc__cleanup();
return 0;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment