diff --git a/llvm/projects/hpvm-tensor-rt/.gitignore b/llvm/projects/hpvm-tensor-rt/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..9581f1d5b06aaafaaae77ef6175bc243707e4685 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/.gitignore @@ -0,0 +1 @@ +./build \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..28ca6bb639731444c030018c4abc726e8d3b597f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required (VERSION 2.6) +project (cudnn-training) + +find_package(CUDA 6.5 REQUIRED) + + +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + message("Debug mode") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-g;-lineinfo;-Xcompiler;-ggdb;-lcurand) +else() + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-O3;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand) +endif() + +set(CUDA_PROPAGATE_HOST_FLAGS OFF) + +# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 " ) + +add_definitions(-DNO_INJECTION) +if(USE_GFLAGS) + add_definitions(-DUSE_GFLAGS) +endif() + +include_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include) +link_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64) + + +# Adding new rule for building a cuDNN runtime library +cuda_add_library(tensor_runtime tensor_runtime/src/tensor_runtime.cu) +cuda_add_cublas_to_target(tensor_runtime) + + +if(USE_GFLAGS) + target_link_libraries(tensor_runtime gflags cudnn -lcurand) +else() + target_link_libraries(tensor_runtime cudnn -lcurand) +endif() + + + +# Adding rule for the debugging source +add_executable(test_ops dnn_sources/src/test_ops.cc) +target_link_libraries(test_ops tensor_runtime) + + +# Full-Precision versions +add_executable(fc2_clipped dnn_sources/src/fc2_clipped.cc) +target_link_libraries(fc2_clipped tensor_runtime) + +add_executable(fc3_clipped dnn_sources/src/fc3_clipped.cc) +target_link_libraries(fc3_clipped tensor_runtime) + +add_executable(fc4_clipped dnn_sources/src/fc4_clipped.cc) +target_link_libraries(fc4_clipped tensor_runtime) + +add_executable(lenet_tanh dnn_sources/src/lenet2_tanh.cc) +target_link_libraries(lenet_tanh tensor_runtime) + +add_executable(lenet_keras dnn_sources/src/lenet_keras.cc) +target_link_libraries(lenet_keras tensor_runtime) + +add_executable(cifar_keras dnn_sources/src/cifar_keras.cc) +target_link_libraries(cifar_keras tensor_runtime) + + +# Half precision networks +add_executable(fc2_half dnn_sources/src/half/fc2_half.cc) +target_link_libraries(fc2_half tensor_runtime) + +add_executable(fc3_half dnn_sources/src/half/fc3_half.cc) +target_link_libraries(fc3_half tensor_runtime) + +add_executable(fc4_half dnn_sources/src/half/fc4_half.cc) +target_link_libraries(fc4_half tensor_runtime) + +add_executable(lenet_tanh_half dnn_sources/src/half/lenet_tanh_half.cc) +target_link_libraries(lenet_tanh_half tensor_runtime) + +add_executable(lenet_keras_half dnn_sources/src/half/lenet_keras_half.cc) +target_link_libraries(lenet_keras_half tensor_runtime) diff --git a/llvm/projects/hpvm-tensor-rt/bin/measure_confidence.py b/llvm/projects/hpvm-tensor-rt/bin/measure_confidence.py new file mode 100644 index 0000000000000000000000000000000000000000..74aa23c71aa3e81fc9422a3cc73ba3b69ed98c8a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/bin/measure_confidence.py @@ -0,0 +1,125 @@ + +import argparse +import os +import subprocess +import sys + + +def getAccuracy(file_name): + + if not os.path.exists(file_name): + print("final_accuracy file not found ") + sys.exit(0) + + file = open(file_name, "r") + acc_str = file.read() + accuracy = float(acc_str) + print accuracy + return accuracy + + +total_runs = 12.0 +skip_lines = 0 + + +def test_func(): + print "test_func" + sys.exit(0) + + +def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold): + + #total_runs = 100.0 + successful_runs = 0.0 + total_acc = 0 + + for i in range(int(total_runs)): + subprocess.call(binary_name) + accuracy = getAccuracy("final_accuracy") + total_acc += accuracy + + if accuracy > accuracy_threshold: + successful_runs += 1 + + confidence = (successful_runs / total_runs) * 100.0 + print("confidence = ", confidence) + avg_acc = total_acc / total_runs + print("average accuracy = ", avg_acc) + + return confidence, avg_acc + + +def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir): + + confidence_list = [] + + if not os.path.exists(result_dir): + print("Path does not exist") + sys.exit(0) + + file_names = os.listdir(result_dir) + print file_names + + for file_name in file_names: + # Skip sub-directories + if os.path.isdir(result_dir + "/" + file_name): + continue + + f = open(result_dir + "/" + file_name) + tuner_file = open("opentuner_flags", "w+") + + index = 0 + results_str = "" + for x in f: + if index >= skip_lines: + error_knob = int(float(x.split()[1])) + print error_knob + tuner_file.write(str(error_knob) + "\n") + + results_str += x + index += 1 + + tuner_file.close() + + run_confidence, avg_accuracy = do_multiple_runs(binary, accuracy, confidence) + + if run_confidence > 90: + f2 = open(output_dir + "/" + file_name, "w+") + f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") + f2.write(results_str) + f2.close() + + conf_result = (run_confidence, avg_accuracy, file_name) + confidence_list.append(conf_result) + + return confidence_list + + +if __name__ == "__main__": + + argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') + argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') + argparser.add_argument('--output-dir', help='Directory for storing output directory') + argparser.add_argument('--binary', help='Binary name to run') + argparser.add_argument('--accuracy', type=float, help='Accuracy constraint') + argparser.add_argument('--confidence', type=float, help='Confidence threshold') + + + args = argparser.parse_args() + result_dir = args.result_dir + output_dir = args.output_dir + binary = args.binary + accuracy = args.accuracy + confidence = args.confidence + + confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) + #print confidence_list + + sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) + + output_file = open(output_dir + "/confidence_summary.txt", "w+") + for x in sorted_list: + output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") + + output_file.close() + diff --git a/llvm/projects/hpvm-tensor-rt/bin/select_top_results.py b/llvm/projects/hpvm-tensor-rt/bin/select_top_results.py new file mode 100644 index 0000000000000000000000000000000000000000..898b4c4f42211e010b1544039cbd4b4125c03b92 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/bin/select_top_results.py @@ -0,0 +1,89 @@ + + +import argparse +import sys +import os + + +log_index = 7 +linear_index = 8 +quad_index = 9 + +top_k = 10 + +def dump_results(sorted_list, k, result_dir, sub_dir): + + ref_dir = result_dir + "/" + sub_dir + if not os.path.exists(ref_dir): + os.mkdir(ref_dir) + + for i in range(k): + file_name = sorted_list[i][1] + file_name = ref_dir + "/" + file_name + "_rank_" + str(i) + f = open(file_name, "w+") + f.write(str(sorted_list[i][2]) + "\t") + f.write(str(sorted_list[i][3]) + "\t") + f.write(str(sorted_list[i][4]) + "\n") + f.write(sorted_list[i][0]) + f.close() + + + + +def select_top_results(result_dir): + + if not os.path.exists(result_dir): + print("Path does not exist") + sys.exit(0) + + file_names = os.listdir(result_dir) + print file_names + + results_arr = [] + + for file_name in file_names: + # Skip sub-directories + if os.path.isdir(result_dir + "/" + file_name): + continue + + log_result = 0.0 + linear_result = 0.0 + quad_result = 0.0 + file_str = "" + + f = open(result_dir + "/" + file_name) + for x in f: + words = x.split() + log_result += float(words[log_index]) + linear_result += float(words[linear_index]) + quad_result += float(words[quad_index]) + file_str += x + + + file_result = (file_str, file_name, log_result, linear_result, quad_result) + results_arr.append(file_result) + + + sorted_list = sorted(results_arr, key = lambda tup: tup[2]) + dump_results(sorted_list, top_k, result_dir, "log") + + sorted_list = sorted(results_arr, key = lambda tup: tup[3]) + dump_results(sorted_list, top_k, result_dir, "linear") + + sorted_list = sorted(results_arr, key = lambda tup: tup[4]) + dump_results(sorted_list, top_k, result_dir, "quad") + + + +if __name__ == "__main__": + + argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') + argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') + + args = argparser.parse_args() + result_dir = args.result_dir + + select_top_results(result_dir) + + + diff --git a/llvm/projects/hpvm-tensor-rt/bin/setupEnv.sh b/llvm/projects/hpvm-tensor-rt/bin/setupEnv.sh new file mode 100644 index 0000000000000000000000000000000000000000..58f16f20d0af12f041840b8037ae13e49c214ed4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/bin/setupEnv.sh @@ -0,0 +1,5 @@ +#!/bin/bash +module load cuda-toolkit/8.0 +export CUDNN_PATH=/software/cuda-toolkit-8.0/lib64/ +export LIBRARY_PATH=$LIBRARY_PATH:/software/cuda-toolkit-8.0/lib64/ + diff --git a/llvm/projects/hpvm-tensor-rt/bin/setup_paths.sh b/llvm/projects/hpvm-tensor-rt/bin/setup_paths.sh new file mode 100644 index 0000000000000000000000000000000000000000..446481b79a47827bf47341ce9d14f15f57d26866 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/bin/setup_paths.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +# Setting include path for Anaconda include files +export CPATH=$CPATH:/home/hsharif3/anaconda2/include/ +# Setting path for llvm/clang-4.0 build +export PATH=/home/hsharif3/Gitlab/llvm/llvm/build/bin/:$PATH + +export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/ + +export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/ diff --git a/llvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh b/llvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh new file mode 100644 index 0000000000000000000000000000000000000000..47b446456e70d1cdcfc6f799ee51caca67fe857c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# CUDNN Path setup +module load cuda-toolkit/9.1 +export CUDNN_PATH=/software/cuda-9.1/lib64/ +export LIBRARY_PATH=$LIBRARY_PATH:/software/cuda-toolkit-9.1/lib64/ + +# HPVM Path setup +export CPATH=$CPATH:/home/hsharif3/anaconda2/include/ +export PATH=/home/hsharif3/Gitlab/llvm/llvm/build/bin/:$PATH +export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/ +export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/ diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeCache.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeCache.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5132c46fbb4539935b1c4c3bccff1d80679482a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeCache.txt @@ -0,0 +1,514 @@ +# This is the CMakeCache file. +# For build in directory: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build +# It was generated by CMake: /usr/bin/cmake +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + +//Path to a program. +CMAKE_AR:FILEPATH=/usr/bin/ar + +//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or +// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel. +CMAKE_BUILD_TYPE:STRING= + +//Enable/Disable color output during build. +CMAKE_COLOR_MAKEFILE:BOOL=ON + +//CXX compiler +CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++ + +//Flags used by the compiler during all build types. +CMAKE_CXX_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_CXX_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release builds for minimum +// size. +CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds. +CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during release builds with debug info. +CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG + +//C compiler +CMAKE_C_COMPILER:FILEPATH=/usr/bin/cc + +//Flags used by the compiler during all build types. +CMAKE_C_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_C_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release builds for minimum +// size. +CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds. +CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during release builds with debug info. +CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG + +//Flags used by the linker. +CMAKE_EXE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Enable/Disable output of compile commands during generation. +CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=OFF + +//Install path prefix, prepended onto install directories. +CMAKE_INSTALL_PREFIX:PATH=/usr/local + +//Path to a program. +CMAKE_LINKER:FILEPATH=/usr/bin/ld + +//Path to a program. +CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/make + +//Flags used by the linker during the creation of modules. +CMAKE_MODULE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_NM:FILEPATH=/usr/bin/nm + +//Path to a program. +CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy + +//Path to a program. +CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump + +//Value Computed by CMake +CMAKE_PROJECT_NAME:STATIC=cudnn-training + +//Path to a program. +CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib + +//Flags used by the linker during the creation of dll's. +CMAKE_SHARED_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//If set, runtime paths are not added when installing shared libraries, +// but are added when building. +CMAKE_SKIP_INSTALL_RPATH:BOOL=NO + +//If set, runtime paths are not added when using shared libraries. +CMAKE_SKIP_RPATH:BOOL=NO + +//Flags used by the linker during the creation of static libraries. +CMAKE_STATIC_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_STRIP:FILEPATH=/usr/bin/strip + +//If this value is on, makefiles will be generated without the +// .SILENT directive, and all commands will be echoed to the console +// during the make. This is useful for debugging only. With Visual +// Studio IDE projects all commands are done without /nologo. +CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE + +//Compile device code in 64 bit mode +CUDA_64_BIT_DEVICE_CODE:BOOL=ON + +//Attach the build rule to the CUDA source file. Enable only when +// the CUDA source file is added to at most one target. +CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE:BOOL=ON + +//Generate and parse .cubin files in Device mode. +CUDA_BUILD_CUBIN:BOOL=OFF + +//Build in Emulation mode +CUDA_BUILD_EMULATION:BOOL=OFF + +//"cudart" library +CUDA_CUDART_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcudart.so + +//"cuda" library (older versions only). +CUDA_CUDA_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/libcuda.so + +//Directory to put all the output files. If blank it will default +// to the CMAKE_CURRENT_BINARY_DIR +CUDA_GENERATED_OUTPUT_DIR:PATH= + +//Generated file extension +CUDA_HOST_COMPILATION_CPP:BOOL=ON + +//Host side compiler used by NVCC +CUDA_HOST_COMPILER:FILEPATH=/usr/bin/cc + +//Path to a program. +CUDA_NVCC_EXECUTABLE:FILEPATH=/software/cuda-9.1/bin/nvcc + +//Semi-colon delimit multiple arguments. +CUDA_NVCC_FLAGS:STRING= + +//Semi-colon delimit multiple arguments. +CUDA_NVCC_FLAGS_DEBUG:STRING= + +//Semi-colon delimit multiple arguments. +CUDA_NVCC_FLAGS_MINSIZEREL:STRING= + +//Semi-colon delimit multiple arguments. +CUDA_NVCC_FLAGS_RELEASE:STRING= + +//Semi-colon delimit multiple arguments. +CUDA_NVCC_FLAGS_RELWITHDEBINFO:STRING= + +//Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile +CUDA_PROPAGATE_HOST_FLAGS:BOOL=ON + +//Path to a file. +CUDA_SDK_ROOT_DIR:PATH=CUDA_SDK_ROOT_DIR-NOTFOUND + +//Compile CUDA objects with separable compilation enabled. Requires +// CUDA 5.0+ +CUDA_SEPARABLE_COMPILATION:BOOL=OFF + +//Specify the name of the class of CPU architecture for which the +// input files must be compiled. +CUDA_TARGET_CPU_ARCH:STRING= + +//Path to a file. +CUDA_TOOLKIT_INCLUDE:PATH=/software/cuda-9.1/include + +//Toolkit location. +CUDA_TOOLKIT_ROOT_DIR:PATH=/software/cuda-9.1 + +//Toolkit target location. +CUDA_TOOLKIT_TARGET_DIR:PATH=/software/cuda-9.1 + +//Use the static version of the CUDA runtime library if available +CUDA_USE_STATIC_CUDA_RUNTIME:BOOL=ON + +//Print out the commands run while compiling the CUDA source file. +// With the Makefile generator this defaults to VERBOSE variable +// specified on the command line, but can be forced on with this +// option. +CUDA_VERBOSE_BUILD:BOOL=OFF + +//Version of CUDA as computed from nvcc. +CUDA_VERSION:STRING=9.1 + +//"cublas" library +CUDA_cublas_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcublas.so + +//static CUDA runtime library +CUDA_cudart_static_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcudart_static.a + +//"cufft" library +CUDA_cufft_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcufft.so + +//"cupti" library +CUDA_cupti_LIBRARY:FILEPATH=/software/cuda-9.1/extras/CUPTI/lib64/libcupti.so + +//"curand" library +CUDA_curand_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcurand.so + +//"cusolver" library +CUDA_cusolver_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcusolver.so + +//"cusparse" library +CUDA_cusparse_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcusparse.so + +//"nppc" library +CUDA_nppc_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libnppc.so + +//"nppi" library +CUDA_nppi_LIBRARY:FILEPATH=CUDA_nppi_LIBRARY-NOTFOUND + +//"npps" library +CUDA_npps_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libnpps.so + +//Path to a library. +CUDA_rt_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/librt.so + +//Value Computed by CMake +cudnn-training_BINARY_DIR:STATIC=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +//Value Computed by CMake +cudnn-training_SOURCE_DIR:STATIC=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +//Dependencies for the target +tensor_runtime_LIB_DEPENDS:STATIC=general;/software/cuda-9.1/lib64/libcudart_static.a;general;-lpthread;general;dl;general;/usr/lib/x86_64-linux-gnu/librt.so;general;/software/cuda-9.1/lib64/libcublas.so;general;cudnn;general;-lcurand; + + +######################## +# INTERNAL cache entries +######################## + +//ADVANCED property for variable: CMAKE_AR +CMAKE_AR-ADVANCED:INTERNAL=1 +//This is the directory where this CMakeCache.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=5 +//Patch version of cmake used to create the current loaded cache +CMAKE_CACHE_PATCH_VERSION:INTERNAL=1 +//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE +CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=/usr/bin/cmake +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest +//ADVANCED property for variable: CMAKE_CXX_COMPILER +CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS +CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG +CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL +CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE +CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO +CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_COMPILER +CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS +CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG +CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL +CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE +CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO +CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Executable file format +CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS +CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG +CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE +CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS +CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1 +//Name of external makefile project generator. +CMAKE_EXTRA_GENERATOR:INTERNAL= +//Name of generator. +CMAKE_GENERATOR:INTERNAL=Unix Makefiles +//Name of generator platform. +CMAKE_GENERATOR_PLATFORM:INTERNAL= +//Name of generator toolset. +CMAKE_GENERATOR_TOOLSET:INTERNAL= +//Have symbol pthread_create +CMAKE_HAVE_LIBC_CREATE:INTERNAL= +//Have library pthreads +CMAKE_HAVE_PTHREADS_CREATE:INTERNAL= +//Have library pthread +CMAKE_HAVE_PTHREAD_CREATE:INTERNAL=1 +//Have include pthread.h +CMAKE_HAVE_PTHREAD_H:INTERNAL=1 +//Source directory with the top level CMakeLists.txt file for this +// project +CMAKE_HOME_DIRECTORY:INTERNAL=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt +//Install .so files without execute permission. +CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1 +//ADVANCED property for variable: CMAKE_LINKER +CMAKE_LINKER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MAKE_PROGRAM +CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG +CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE +CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_NM +CMAKE_NM-ADVANCED:INTERNAL=1 +//number of local generators +CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1 +//ADVANCED property for variable: CMAKE_OBJCOPY +CMAKE_OBJCOPY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_OBJDUMP +CMAKE_OBJDUMP-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RANLIB +CMAKE_RANLIB-ADVANCED:INTERNAL=1 +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=/usr/share/cmake-3.5 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG +CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE +CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH +CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_RPATH +CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS +CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG +CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE +CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STRIP +CMAKE_STRIP-ADVANCED:INTERNAL=1 +//uname command +CMAKE_UNAME:INTERNAL=/bin/uname +//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE +CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_64_BIT_DEVICE_CODE +CUDA_64_BIT_DEVICE_CODE-ADVANCED:INTERNAL=1 +//List of intermediate files that are part of the cuda dependency +// scanning. +CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend +//ADVANCED property for variable: CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE +CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_BUILD_CUBIN +CUDA_BUILD_CUBIN-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_BUILD_EMULATION +CUDA_BUILD_EMULATION-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_CUDART_LIBRARY +CUDA_CUDART_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_CUDA_LIBRARY +CUDA_CUDA_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_GENERATED_OUTPUT_DIR +CUDA_GENERATED_OUTPUT_DIR-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_HOST_COMPILATION_CPP +CUDA_HOST_COMPILATION_CPP-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_EXECUTABLE +CUDA_NVCC_EXECUTABLE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS +CUDA_NVCC_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_DEBUG +CUDA_NVCC_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_MINSIZEREL +CUDA_NVCC_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELEASE +CUDA_NVCC_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELWITHDEBINFO +CUDA_NVCC_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_PROPAGATE_HOST_FLAGS +CUDA_PROPAGATE_HOST_FLAGS-ADVANCED:INTERNAL=1 +//This is the value of the last time CUDA_SDK_ROOT_DIR was set +// successfully. +CUDA_SDK_ROOT_DIR_INTERNAL:INTERNAL=CUDA_SDK_ROOT_DIR-NOTFOUND +//ADVANCED property for variable: CUDA_SEPARABLE_COMPILATION +CUDA_SEPARABLE_COMPILATION-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_TARGET_CPU_ARCH +CUDA_TARGET_CPU_ARCH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_TOOLKIT_INCLUDE +CUDA_TOOLKIT_INCLUDE-ADVANCED:INTERNAL=1 +//This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was +// set successfully. +CUDA_TOOLKIT_ROOT_DIR_INTERNAL:INTERNAL=/software/cuda-9.1 +//ADVANCED property for variable: CUDA_TOOLKIT_TARGET_DIR +CUDA_TOOLKIT_TARGET_DIR-ADVANCED:INTERNAL=1 +//This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was +// set successfully. +CUDA_TOOLKIT_TARGET_DIR_INTERNAL:INTERNAL=/software/cuda-9.1 +//ADVANCED property for variable: CUDA_VERBOSE_BUILD +CUDA_VERBOSE_BUILD-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_VERSION +CUDA_VERSION-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cublas_LIBRARY +CUDA_cublas_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cudart_static_LIBRARY +CUDA_cudart_static_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cufft_LIBRARY +CUDA_cufft_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cupti_LIBRARY +CUDA_cupti_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_curand_LIBRARY +CUDA_curand_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cusolver_LIBRARY +CUDA_cusolver_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cusparse_LIBRARY +CUDA_cusparse_LIBRARY-ADVANCED:INTERNAL=1 +//Location of make2cmake.cmake +CUDA_make2cmake:INTERNAL=/usr/share/cmake-3.5/Modules/FindCUDA/make2cmake.cmake +//ADVANCED property for variable: CUDA_nppc_LIBRARY +CUDA_nppc_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppi_LIBRARY +CUDA_nppi_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_npps_LIBRARY +CUDA_npps_LIBRARY-ADVANCED:INTERNAL=1 +//Location of parse_cubin.cmake +CUDA_parse_cubin:INTERNAL=/usr/share/cmake-3.5/Modules/FindCUDA/parse_cubin.cmake +//Location of run_nvcc.cmake +CUDA_run_nvcc:INTERNAL=/usr/share/cmake-3.5/Modules/FindCUDA/run_nvcc.cmake +//Details about finding CUDA +FIND_PACKAGE_MESSAGE_DETAILS_CUDA:INTERNAL=[/software/cuda-9.1][/software/cuda-9.1/bin/nvcc][/software/cuda-9.1/include][/software/cuda-9.1/lib64/libcudart.so][v9.1(6.5)] +//Details about finding Threads +FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()] + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCCompiler.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f40522e627a66ddca0a1b7c75b83836d5e12e77a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCCompiler.cmake @@ -0,0 +1,67 @@ +set(CMAKE_C_COMPILER "/usr/bin/cc") +set(CMAKE_C_COMPILER_ARG1 "") +set(CMAKE_C_COMPILER_ID "GNU") +set(CMAKE_C_COMPILER_VERSION "5.4.0") +set(CMAKE_C_COMPILER_WRAPPER "") +set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "11") +set(CMAKE_C_COMPILE_FEATURES "c_function_prototypes;c_restrict;c_variadic_macros;c_static_assert") +set(CMAKE_C90_COMPILE_FEATURES "c_function_prototypes") +set(CMAKE_C99_COMPILE_FEATURES "c_restrict;c_variadic_macros") +set(CMAKE_C11_COMPILE_FEATURES "c_static_assert") + +set(CMAKE_C_PLATFORM_ID "Linux") +set(CMAKE_C_SIMULATE_ID "") +set(CMAKE_C_SIMULATE_VERSION "") + +set(CMAKE_AR "/usr/bin/ar") +set(CMAKE_RANLIB "/usr/bin/ranlib") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_COMPILER_IS_GNUCC 1) +set(CMAKE_C_COMPILER_LOADED 1) +set(CMAKE_C_COMPILER_WORKS TRUE) +set(CMAKE_C_ABI_COMPILED TRUE) +set(CMAKE_COMPILER_IS_MINGW ) +set(CMAKE_COMPILER_IS_CYGWIN ) +if(CMAKE_COMPILER_IS_CYGWIN) + set(CYGWIN 1) + set(UNIX 1) +endif() + +set(CMAKE_C_COMPILER_ENV_VAR "CC") + +if(CMAKE_COMPILER_IS_MINGW) + set(MINGW 1) +endif() +set(CMAKE_C_COMPILER_ID_RUN 1) +set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) +set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) +set(CMAKE_C_LINKER_PREFERENCE 10) + +# Save compiler ABI information. +set(CMAKE_C_SIZEOF_DATA_PTR "8") +set(CMAKE_C_COMPILER_ABI "ELF") +set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") + +if(CMAKE_C_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_C_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") +endif() + +if(CMAKE_C_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") +endif() + +set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + +set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "c") +set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib") +set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCXXCompiler.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCXXCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..013ee9298fb861e7d0350d49a1fc08c0274b5e59 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCXXCompiler.cmake @@ -0,0 +1,68 @@ +set(CMAKE_CXX_COMPILER "/usr/bin/c++") +set(CMAKE_CXX_COMPILER_ARG1 "") +set(CMAKE_CXX_COMPILER_ID "GNU") +set(CMAKE_CXX_COMPILER_VERSION "5.4.0") +set(CMAKE_CXX_COMPILER_WRAPPER "") +set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "98") +set(CMAKE_CXX_COMPILE_FEATURES "cxx_template_template_parameters;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") +set(CMAKE_CXX98_COMPILE_FEATURES "cxx_template_template_parameters") +set(CMAKE_CXX11_COMPILE_FEATURES "cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") +set(CMAKE_CXX14_COMPILE_FEATURES "cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") + +set(CMAKE_CXX_PLATFORM_ID "Linux") +set(CMAKE_CXX_SIMULATE_ID "") +set(CMAKE_CXX_SIMULATE_VERSION "") + +set(CMAKE_AR "/usr/bin/ar") +set(CMAKE_RANLIB "/usr/bin/ranlib") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_COMPILER_IS_GNUCXX 1) +set(CMAKE_CXX_COMPILER_LOADED 1) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_CXX_ABI_COMPILED TRUE) +set(CMAKE_COMPILER_IS_MINGW ) +set(CMAKE_COMPILER_IS_CYGWIN ) +if(CMAKE_COMPILER_IS_CYGWIN) + set(CYGWIN 1) + set(UNIX 1) +endif() + +set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +if(CMAKE_COMPILER_IS_MINGW) + set(MINGW 1) +endif() +set(CMAKE_CXX_COMPILER_ID_RUN 1) +set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) +set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;mm;CPP) +set(CMAKE_CXX_LINKER_PREFERENCE 30) +set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) + +# Save compiler ABI information. +set(CMAKE_CXX_SIZEOF_DATA_PTR "8") +set(CMAKE_CXX_COMPILER_ABI "ELF") +set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") + +if(CMAKE_CXX_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_CXX_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +endif() + +if(CMAKE_CXX_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") +endif() + +set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + +set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;c") +set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib") +set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_C.bin b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_C.bin new file mode 100755 index 0000000000000000000000000000000000000000..8fadb3a4377be9080de10797e815ab49faade975 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_C.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_CXX.bin b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_CXX.bin new file mode 100755 index 0000000000000000000000000000000000000000..f89cba0f5e50283f60499d801dbc6711babc578c Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_CXX.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeSystem.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeSystem.cmake new file mode 100644 index 0000000000000000000000000000000000000000..daf4b4c75bfa1a5e36628a6e7f99442486dce87d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeSystem.cmake @@ -0,0 +1,15 @@ +set(CMAKE_HOST_SYSTEM "Linux-4.15.0-36-generic") +set(CMAKE_HOST_SYSTEM_NAME "Linux") +set(CMAKE_HOST_SYSTEM_VERSION "4.15.0-36-generic") +set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") + + + +set(CMAKE_SYSTEM "Linux-4.15.0-36-generic") +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_VERSION "4.15.0-36-generic") +set(CMAKE_SYSTEM_PROCESSOR "x86_64") + +set(CMAKE_CROSSCOMPILING "FALSE") + +set(CMAKE_SYSTEM_LOADED 1) diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/CMakeCCompilerId.c b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/CMakeCCompilerId.c new file mode 100644 index 0000000000000000000000000000000000000000..570a15e994e4f10ca4a05b4451ea350fb942337f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/CMakeCCompilerId.c @@ -0,0 +1,544 @@ +#ifdef __cplusplus +# error "A C++ compiler has been selected for C." +#endif + +#if defined(__18CXX) +# define ID_VOID_MAIN +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif + /* __INTEL_COMPILER = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_C) +# define COMPILER_ID "SunPro" +# if __SUNPRO_C >= 0x5100 + /* __SUNPRO_C = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# endif + +#elif defined(__HP_cc) +# define COMPILER_ID "HP" + /* __HP_cc = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_cc % 100) + +#elif defined(__DECC) +# define COMPILER_ID "Compaq" + /* __DECC_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECC_VER % 10000) + +#elif defined(__IBMC__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800 +# define COMPILER_ID "XL" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) +# define COMPILER_ID "Fujitsu" + +#elif defined(__TINYC__) +# define COMPILER_ID "TinyCC" + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__ ) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" + +#elif defined(__ARMCC_VERSION) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(SDCC) +# define COMPILER_ID "SDCC" + /* SDCC = VRP */ +# define COMPILER_VERSION_MAJOR DEC(SDCC/100) +# define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10) +# define COMPILER_VERSION_PATCH DEC(SDCC % 10) + +#elif defined(_SGI_COMPILER_VERSION) || defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" +# if defined(_SGI_COMPILER_VERSION) + /* _SGI_COMPILER_VERSION = VRP */ +# define COMPILER_VERSION_MAJOR DEC(_SGI_COMPILER_VERSION/100) +# define COMPILER_VERSION_MINOR DEC(_SGI_COMPILER_VERSION/10 % 10) +# define COMPILER_VERSION_PATCH DEC(_SGI_COMPILER_VERSION % 10) +# else + /* _COMPILER_VERSION = VRP */ +# define COMPILER_VERSION_MAJOR DEC(_COMPILER_VERSION/100) +# define COMPILER_VERSION_MINOR DEC(_COMPILER_VERSION/10 % 10) +# define COMPILER_VERSION_PATCH DEC(_COMPILER_VERSION % 10) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXE) || defined(__CRAYXC) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# else /* unknown platform */ +# define PLATFORM_ID "" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID "" +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number components. */ +#ifdef COMPILER_VERSION_MAJOR +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + + +const char* info_language_dialect_default = "INFO" ":" "dialect_default[" +#if !defined(__STDC_VERSION__) + "90" +#elif __STDC_VERSION__ >= 201000L + "11" +#elif __STDC_VERSION__ >= 199901L + "99" +#else +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +#ifdef ID_VOID_MAIN +void main() {} +#else +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + require += info_arch[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXE) || defined(__CRAYXC) + require += info_cray[argc]; +#endif + require += info_language_dialect_default[argc]; + (void)argv; + return require; +} +#endif diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out new file mode 100755 index 0000000000000000000000000000000000000000..afc42a94bbc7371ac5a573a3b9eb6b0812ecca21 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out differ diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/CMakeCXXCompilerId.cpp b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/CMakeCXXCompilerId.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e6d853637c6f7637dd8672b59612a9263a4d0244 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/CMakeCXXCompilerId.cpp @@ -0,0 +1,533 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + /* __COMO_VERSION__ = VRR */ +# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100) +# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100) + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif + /* __INTEL_COMPILER = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" +# if __SUNPRO_CC >= 0x5100 + /* __SUNPRO_CC = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# endif + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + /* __HP_aCC = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100) + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + /* __DECCXX_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000) + +#elif defined(__IBMCPP__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800 +# define COMPILER_ID "XL" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) +# define COMPILER_ID "Fujitsu" + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__ ) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" + +#elif defined(__ARMCC_VERSION) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(_SGI_COMPILER_VERSION) || defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" +# if defined(_SGI_COMPILER_VERSION) + /* _SGI_COMPILER_VERSION = VRP */ +# define COMPILER_VERSION_MAJOR DEC(_SGI_COMPILER_VERSION/100) +# define COMPILER_VERSION_MINOR DEC(_SGI_COMPILER_VERSION/10 % 10) +# define COMPILER_VERSION_PATCH DEC(_SGI_COMPILER_VERSION % 10) +# else + /* _COMPILER_VERSION = VRP */ +# define COMPILER_VERSION_MAJOR DEC(_COMPILER_VERSION/100) +# define COMPILER_VERSION_MINOR DEC(_COMPILER_VERSION/10 % 10) +# define COMPILER_VERSION_PATCH DEC(_COMPILER_VERSION % 10) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXE) || defined(__CRAYXC) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# else /* unknown platform */ +# define PLATFORM_ID "" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID "" +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number components. */ +#ifdef COMPILER_VERSION_MAJOR +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + + +const char* info_language_dialect_default = "INFO" ":" "dialect_default[" +#if __cplusplus >= 201402L + "14" +#elif __cplusplus >= 201103L + "11" +#else + "98" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXE) || defined(__CRAYXC) + require += info_cray[argc]; +#endif + require += info_language_dialect_default[argc]; + (void)argv; + return require; +} diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out new file mode 100755 index 0000000000000000000000000000000000000000..648b86701f00871e7148b5f6fcce25d049005826 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out differ diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeDirectoryInformation.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeDirectoryInformation.cmake new file mode 100644 index 0000000000000000000000000000000000000000..289a9a79806014b6aeb5fd1006ec61b9b84ad4f0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeDirectoryInformation.cmake @@ -0,0 +1,16 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Relative path conversion top directories. +set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt") +set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build") + +# Force unix paths in dependencies. +set(CMAKE_FORCE_UNIX_PATHS 1) + + +# The C and CXX include file regular expressions for this directory. +set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") +set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") +set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) +set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeError.log b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeError.log new file mode 100644 index 0000000000000000000000000000000000000000..df25b3106d9fa8bd2bb0165b31ac5a59ed0270cb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeError.log @@ -0,0 +1,55 @@ +Determining if the pthread_create exist failed with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_4a9d8/fast" +/usr/bin/make -f CMakeFiles/cmTC_4a9d8.dir/build.make CMakeFiles/cmTC_4a9d8.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o +/usr/bin/cc -fPIC -o CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/CheckSymbolExists.c +Linking C executable cmTC_4a9d8 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_4a9d8.dir/link.txt --verbose=1 +/usr/bin/cc -fPIC CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o -o cmTC_4a9d8 -rdynamic +CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o: In function `main': +CheckSymbolExists.c:(.text+0x1b): undefined reference to `pthread_create' +collect2: error: ld returned 1 exit status +CMakeFiles/cmTC_4a9d8.dir/build.make:97: recipe for target 'cmTC_4a9d8' failed +make[1]: *** [cmTC_4a9d8] Error 1 +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Makefile:126: recipe for target 'cmTC_4a9d8/fast' failed +make: *** [cmTC_4a9d8/fast] Error 2 + +File /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/CheckSymbolExists.c: +/* */ +#include <pthread.h> + +int main(int argc, char** argv) +{ + (void)argv; +#ifndef pthread_create + return ((int*)(&pthread_create))[argc]; +#else + (void)argc; + return 0; +#endif +} + +Determining if the function pthread_create exists in the pthreads failed with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_1413b/fast" +/usr/bin/make -f CMakeFiles/cmTC_1413b.dir/build.make CMakeFiles/cmTC_1413b.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_1413b.dir/CheckFunctionExists.c.o +/usr/bin/cc -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create -o CMakeFiles/cmTC_1413b.dir/CheckFunctionExists.c.o -c /usr/share/cmake-3.5/Modules/CheckFunctionExists.c +Linking C executable cmTC_1413b +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_1413b.dir/link.txt --verbose=1 +/usr/bin/cc -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create CMakeFiles/cmTC_1413b.dir/CheckFunctionExists.c.o -o cmTC_1413b -rdynamic -lpthreads +/usr/bin/ld: cannot find -lpthreads +collect2: error: ld returned 1 exit status +CMakeFiles/cmTC_1413b.dir/build.make:97: recipe for target 'cmTC_1413b' failed +make[1]: *** [cmTC_1413b] Error 1 +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Makefile:126: recipe for target 'cmTC_1413b/fast' failed +make: *** [cmTC_1413b/fast] Error 2 + + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeOutput.log b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeOutput.log new file mode 100644 index 0000000000000000000000000000000000000000..8c55cb7b351b2f10aa1430b64ec906416b12a072 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeOutput.log @@ -0,0 +1,584 @@ +The system is: Linux - 4.15.0-36-generic - x86_64 +Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. +Compiler: /usr/bin/cc +Build flags: +Id flags: + +The output was: +0 + + +Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out" + +The C compiler identification is GNU, found in "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out" + +Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. +Compiler: /usr/bin/c++ +Build flags: +Id flags: + +The output was: +0 + + +Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" + +The CXX compiler identification is GNU, found in "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out" + +Determining if the C compiler works passed with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_f2cd6/fast" +/usr/bin/make -f CMakeFiles/cmTC_f2cd6.dir/build.make CMakeFiles/cmTC_f2cd6.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_f2cd6.dir/testCCompiler.c.o +/usr/bin/cc -o CMakeFiles/cmTC_f2cd6.dir/testCCompiler.c.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/testCCompiler.c +Linking C executable cmTC_f2cd6 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_f2cd6.dir/link.txt --verbose=1 +/usr/bin/cc CMakeFiles/cmTC_f2cd6.dir/testCCompiler.c.o -o cmTC_f2cd6 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + +Detecting C compiler ABI info compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_2ea18/fast" +/usr/bin/make -f CMakeFiles/cmTC_2ea18.dir/build.make CMakeFiles/cmTC_2ea18.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o +/usr/bin/cc -o CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake-3.5/Modules/CMakeCCompilerABI.c +Linking C executable cmTC_2ea18 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_2ea18.dir/link.txt --verbose=1 +/usr/bin/cc -v CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -o cmTC_2ea18 -rdynamic +Using built-in specs. +COLLECT_GCC=/usr/bin/cc +COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper +Target: x86_64-linux-gnu +Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu +Thread model: posix +gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) +COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/ +LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/ +COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_2ea18' '-rdynamic' '-mtune=generic' '-march=x86-64' + /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/cc95kyZ3.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_2ea18 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + +Parsed C implicit link information from above output: + link line regex: [^( *|.*[/\])(ld|([^/\]+-)?ld|collect2)[^/\]*( |$)] + ignore line: [Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp] + ignore line: [] + ignore line: [Run Build Command:"/usr/bin/make" "cmTC_2ea18/fast"] + ignore line: [/usr/bin/make -f CMakeFiles/cmTC_2ea18.dir/build.make CMakeFiles/cmTC_2ea18.dir/build] + ignore line: [make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'] + ignore line: [Building C object CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o] + ignore line: [/usr/bin/cc -o CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake-3.5/Modules/CMakeCCompilerABI.c] + ignore line: [Linking C executable cmTC_2ea18] + ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_2ea18.dir/link.txt --verbose=1] + ignore line: [/usr/bin/cc -v CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -o cmTC_2ea18 -rdynamic ] + ignore line: [Using built-in specs.] + ignore line: [COLLECT_GCC=/usr/bin/cc] + ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper] + ignore line: [Target: x86_64-linux-gnu] + ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu] + ignore line: [Thread model: posix] + ignore line: [gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) ] + ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/] + ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_2ea18' '-rdynamic' '-mtune=generic' '-march=x86-64'] + link line: [ /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/cc95kyZ3.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_2ea18 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/5/collect2] ==> ignore + arg [-plugin] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so] ==> ignore + arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper] ==> ignore + arg [-plugin-opt=-fresolution=/tmp/cc95kyZ3.res] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [-plugin-opt=-pass-through=-lc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [--sysroot=/] ==> ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [--hash-style=gnu] ==> ignore + arg [--as-needed] ==> ignore + arg [-export-dynamic] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-zrelro] ==> ignore + arg [-o] ==> ignore + arg [cmTC_2ea18] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o] ==> ignore + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] + arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu] + arg [-L/lib/../lib] ==> dir [/lib/../lib] + arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu] + arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib] + arg [-L.] ==> ignore + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..] + arg [CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o] ==> ignore + arg [-lgcc] ==> lib [gcc] + arg [--as-needed] ==> ignore + arg [-lgcc_s] ==> lib [gcc_s] + arg [--no-as-needed] ==> ignore + arg [-lc] ==> lib [c] + arg [-lgcc] ==> lib [gcc] + arg [--as-needed] ==> ignore + arg [-lgcc_s] ==> lib [gcc_s] + arg [--no-as-needed] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtend.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o] ==> ignore + remove lib [gcc] + remove lib [gcc_s] + remove lib [gcc] + remove lib [gcc_s] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5] ==> [/usr/lib/gcc/x86_64-linux-gnu/5] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> [/usr/lib] + collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu] + collapse library dir [/lib/../lib] ==> [/lib] + collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/../lib] ==> [/usr/lib] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> [/usr/lib] + implicit libs: [c] + implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib] + implicit fwks: [] + + + + +Detecting C [-std=c11] compiler features compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_fe354/fast" +/usr/bin/make -f CMakeFiles/cmTC_fe354.dir/build.make CMakeFiles/cmTC_fe354.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_fe354.dir/feature_tests.c.o +/usr/bin/cc -std=c11 -o CMakeFiles/cmTC_fe354.dir/feature_tests.c.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c +Linking C executable cmTC_fe354 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_fe354.dir/link.txt --verbose=1 +/usr/bin/cc CMakeFiles/cmTC_fe354.dir/feature_tests.c.o -o cmTC_fe354 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + + Feature record: C_FEATURE:1c_function_prototypes + Feature record: C_FEATURE:1c_restrict + Feature record: C_FEATURE:1c_static_assert + Feature record: C_FEATURE:1c_variadic_macros + + +Detecting C [-std=c99] compiler features compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_fefa5/fast" +/usr/bin/make -f CMakeFiles/cmTC_fefa5.dir/build.make CMakeFiles/cmTC_fefa5.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_fefa5.dir/feature_tests.c.o +/usr/bin/cc -std=c99 -o CMakeFiles/cmTC_fefa5.dir/feature_tests.c.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c +Linking C executable cmTC_fefa5 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_fefa5.dir/link.txt --verbose=1 +/usr/bin/cc CMakeFiles/cmTC_fefa5.dir/feature_tests.c.o -o cmTC_fefa5 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + + Feature record: C_FEATURE:1c_function_prototypes + Feature record: C_FEATURE:1c_restrict + Feature record: C_FEATURE:0c_static_assert + Feature record: C_FEATURE:1c_variadic_macros + + +Detecting C [-std=c90] compiler features compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_1c31f/fast" +/usr/bin/make -f CMakeFiles/cmTC_1c31f.dir/build.make CMakeFiles/cmTC_1c31f.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_1c31f.dir/feature_tests.c.o +/usr/bin/cc -std=c90 -o CMakeFiles/cmTC_1c31f.dir/feature_tests.c.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c +Linking C executable cmTC_1c31f +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_1c31f.dir/link.txt --verbose=1 +/usr/bin/cc CMakeFiles/cmTC_1c31f.dir/feature_tests.c.o -o cmTC_1c31f -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + + Feature record: C_FEATURE:1c_function_prototypes + Feature record: C_FEATURE:0c_restrict + Feature record: C_FEATURE:0c_static_assert + Feature record: C_FEATURE:0c_variadic_macros +Determining if the CXX compiler works passed with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_81321/fast" +/usr/bin/make -f CMakeFiles/cmTC_81321.dir/build.make CMakeFiles/cmTC_81321.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_81321.dir/testCXXCompiler.cxx.o +/usr/bin/c++ -o CMakeFiles/cmTC_81321.dir/testCXXCompiler.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/testCXXCompiler.cxx +Linking CXX executable cmTC_81321 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_81321.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_81321.dir/testCXXCompiler.cxx.o -o cmTC_81321 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + +Detecting CXX compiler ABI info compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_c70a3/fast" +/usr/bin/make -f CMakeFiles/cmTC_c70a3.dir/build.make CMakeFiles/cmTC_c70a3.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o +/usr/bin/c++ -o CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.5/Modules/CMakeCXXCompilerABI.cpp +Linking CXX executable cmTC_c70a3 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c70a3.dir/link.txt --verbose=1 +/usr/bin/c++ -v CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_c70a3 -rdynamic +Using built-in specs. +COLLECT_GCC=/usr/bin/c++ +COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper +Target: x86_64-linux-gnu +Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu +Thread model: posix +gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) +COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/ +LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/ +COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c70a3' '-rdynamic' '-shared-libgcc' '-mtune=generic' '-march=x86-64' + /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/ccYtMczG.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_c70a3 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + +Parsed CXX implicit link information from above output: + link line regex: [^( *|.*[/\])(ld|([^/\]+-)?ld|collect2)[^/\]*( |$)] + ignore line: [Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp] + ignore line: [] + ignore line: [Run Build Command:"/usr/bin/make" "cmTC_c70a3/fast"] + ignore line: [/usr/bin/make -f CMakeFiles/cmTC_c70a3.dir/build.make CMakeFiles/cmTC_c70a3.dir/build] + ignore line: [make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'] + ignore line: [Building CXX object CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o] + ignore line: [/usr/bin/c++ -o CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.5/Modules/CMakeCXXCompilerABI.cpp] + ignore line: [Linking CXX executable cmTC_c70a3] + ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c70a3.dir/link.txt --verbose=1] + ignore line: [/usr/bin/c++ -v CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_c70a3 -rdynamic ] + ignore line: [Using built-in specs.] + ignore line: [COLLECT_GCC=/usr/bin/c++] + ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper] + ignore line: [Target: x86_64-linux-gnu] + ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu] + ignore line: [Thread model: posix] + ignore line: [gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) ] + ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/] + ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c70a3' '-rdynamic' '-shared-libgcc' '-mtune=generic' '-march=x86-64'] + link line: [ /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/ccYtMczG.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_c70a3 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/5/collect2] ==> ignore + arg [-plugin] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so] ==> ignore + arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper] ==> ignore + arg [-plugin-opt=-fresolution=/tmp/ccYtMczG.res] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [-plugin-opt=-pass-through=-lc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [--sysroot=/] ==> ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [--hash-style=gnu] ==> ignore + arg [--as-needed] ==> ignore + arg [-export-dynamic] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-zrelro] ==> ignore + arg [-o] ==> ignore + arg [cmTC_c70a3] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o] ==> ignore + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] + arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu] + arg [-L/lib/../lib] ==> dir [/lib/../lib] + arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu] + arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib] + arg [-L.] ==> ignore + arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..] + arg [CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore + arg [-lstdc++] ==> lib [stdc++] + arg [-lm] ==> lib [m] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [-lc] ==> lib [c] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtend.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o] ==> ignore + remove lib [gcc_s] + remove lib [gcc] + remove lib [gcc_s] + remove lib [gcc] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5] ==> [/usr/lib/gcc/x86_64-linux-gnu/5] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> [/usr/lib] + collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu] + collapse library dir [/lib/../lib] ==> [/lib] + collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/../lib] ==> [/usr/lib] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> [/usr/lib] + implicit libs: [stdc++;m;c] + implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib] + implicit fwks: [] + + + + +Detecting CXX [-std=c++14] compiler features compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_6a40d/fast" +/usr/bin/make -f CMakeFiles/cmTC_6a40d.dir/build.make CMakeFiles/cmTC_6a40d.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_6a40d.dir/feature_tests.cxx.o +/usr/bin/c++ -std=c++14 -o CMakeFiles/cmTC_6a40d.dir/feature_tests.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx +Linking CXX executable cmTC_6a40d +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_6a40d.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_6a40d.dir/feature_tests.cxx.o -o cmTC_6a40d -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + + Feature record: CXX_FEATURE:1cxx_aggregate_default_initializers + Feature record: CXX_FEATURE:1cxx_alias_templates + Feature record: CXX_FEATURE:1cxx_alignas + Feature record: CXX_FEATURE:1cxx_alignof + Feature record: CXX_FEATURE:1cxx_attributes + Feature record: CXX_FEATURE:1cxx_attribute_deprecated + Feature record: CXX_FEATURE:1cxx_auto_type + Feature record: CXX_FEATURE:1cxx_binary_literals + Feature record: CXX_FEATURE:1cxx_constexpr + Feature record: CXX_FEATURE:1cxx_contextual_conversions + Feature record: CXX_FEATURE:1cxx_decltype + Feature record: CXX_FEATURE:1cxx_decltype_auto + Feature record: CXX_FEATURE:1cxx_decltype_incomplete_return_types + Feature record: CXX_FEATURE:1cxx_default_function_template_args + Feature record: CXX_FEATURE:1cxx_defaulted_functions + Feature record: CXX_FEATURE:1cxx_defaulted_move_initializers + Feature record: CXX_FEATURE:1cxx_delegating_constructors + Feature record: CXX_FEATURE:1cxx_deleted_functions + Feature record: CXX_FEATURE:1cxx_digit_separators + Feature record: CXX_FEATURE:1cxx_enum_forward_declarations + Feature record: CXX_FEATURE:1cxx_explicit_conversions + Feature record: CXX_FEATURE:1cxx_extended_friend_declarations + Feature record: CXX_FEATURE:1cxx_extern_templates + Feature record: CXX_FEATURE:1cxx_final + Feature record: CXX_FEATURE:1cxx_func_identifier + Feature record: CXX_FEATURE:1cxx_generalized_initializers + Feature record: CXX_FEATURE:1cxx_generic_lambdas + Feature record: CXX_FEATURE:1cxx_inheriting_constructors + Feature record: CXX_FEATURE:1cxx_inline_namespaces + Feature record: CXX_FEATURE:1cxx_lambdas + Feature record: CXX_FEATURE:1cxx_lambda_init_captures + Feature record: CXX_FEATURE:1cxx_local_type_template_args + Feature record: CXX_FEATURE:1cxx_long_long_type + Feature record: CXX_FEATURE:1cxx_noexcept + Feature record: CXX_FEATURE:1cxx_nonstatic_member_init + Feature record: CXX_FEATURE:1cxx_nullptr + Feature record: CXX_FEATURE:1cxx_override + Feature record: CXX_FEATURE:1cxx_range_for + Feature record: CXX_FEATURE:1cxx_raw_string_literals + Feature record: CXX_FEATURE:1cxx_reference_qualified_functions + Feature record: CXX_FEATURE:1cxx_relaxed_constexpr + Feature record: CXX_FEATURE:1cxx_return_type_deduction + Feature record: CXX_FEATURE:1cxx_right_angle_brackets + Feature record: CXX_FEATURE:1cxx_rvalue_references + Feature record: CXX_FEATURE:1cxx_sizeof_member + Feature record: CXX_FEATURE:1cxx_static_assert + Feature record: CXX_FEATURE:1cxx_strong_enums + Feature record: CXX_FEATURE:1cxx_template_template_parameters + Feature record: CXX_FEATURE:1cxx_thread_local + Feature record: CXX_FEATURE:1cxx_trailing_return_types + Feature record: CXX_FEATURE:1cxx_unicode_literals + Feature record: CXX_FEATURE:1cxx_uniform_initialization + Feature record: CXX_FEATURE:1cxx_unrestricted_unions + Feature record: CXX_FEATURE:1cxx_user_literals + Feature record: CXX_FEATURE:1cxx_variable_templates + Feature record: CXX_FEATURE:1cxx_variadic_macros + Feature record: CXX_FEATURE:1cxx_variadic_templates + + +Detecting CXX [-std=c++11] compiler features compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_b98f2/fast" +/usr/bin/make -f CMakeFiles/cmTC_b98f2.dir/build.make CMakeFiles/cmTC_b98f2.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_b98f2.dir/feature_tests.cxx.o +/usr/bin/c++ -std=c++11 -o CMakeFiles/cmTC_b98f2.dir/feature_tests.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx +Linking CXX executable cmTC_b98f2 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_b98f2.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_b98f2.dir/feature_tests.cxx.o -o cmTC_b98f2 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + + Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers + Feature record: CXX_FEATURE:1cxx_alias_templates + Feature record: CXX_FEATURE:1cxx_alignas + Feature record: CXX_FEATURE:1cxx_alignof + Feature record: CXX_FEATURE:1cxx_attributes + Feature record: CXX_FEATURE:0cxx_attribute_deprecated + Feature record: CXX_FEATURE:1cxx_auto_type + Feature record: CXX_FEATURE:0cxx_binary_literals + Feature record: CXX_FEATURE:1cxx_constexpr + Feature record: CXX_FEATURE:0cxx_contextual_conversions + Feature record: CXX_FEATURE:1cxx_decltype + Feature record: CXX_FEATURE:0cxx_decltype_auto + Feature record: CXX_FEATURE:1cxx_decltype_incomplete_return_types + Feature record: CXX_FEATURE:1cxx_default_function_template_args + Feature record: CXX_FEATURE:1cxx_defaulted_functions + Feature record: CXX_FEATURE:1cxx_defaulted_move_initializers + Feature record: CXX_FEATURE:1cxx_delegating_constructors + Feature record: CXX_FEATURE:1cxx_deleted_functions + Feature record: CXX_FEATURE:0cxx_digit_separators + Feature record: CXX_FEATURE:1cxx_enum_forward_declarations + Feature record: CXX_FEATURE:1cxx_explicit_conversions + Feature record: CXX_FEATURE:1cxx_extended_friend_declarations + Feature record: CXX_FEATURE:1cxx_extern_templates + Feature record: CXX_FEATURE:1cxx_final + Feature record: CXX_FEATURE:1cxx_func_identifier + Feature record: CXX_FEATURE:1cxx_generalized_initializers + Feature record: CXX_FEATURE:0cxx_generic_lambdas + Feature record: CXX_FEATURE:1cxx_inheriting_constructors + Feature record: CXX_FEATURE:1cxx_inline_namespaces + Feature record: CXX_FEATURE:1cxx_lambdas + Feature record: CXX_FEATURE:0cxx_lambda_init_captures + Feature record: CXX_FEATURE:1cxx_local_type_template_args + Feature record: CXX_FEATURE:1cxx_long_long_type + Feature record: CXX_FEATURE:1cxx_noexcept + Feature record: CXX_FEATURE:1cxx_nonstatic_member_init + Feature record: CXX_FEATURE:1cxx_nullptr + Feature record: CXX_FEATURE:1cxx_override + Feature record: CXX_FEATURE:1cxx_range_for + Feature record: CXX_FEATURE:1cxx_raw_string_literals + Feature record: CXX_FEATURE:1cxx_reference_qualified_functions + Feature record: CXX_FEATURE:0cxx_relaxed_constexpr + Feature record: CXX_FEATURE:0cxx_return_type_deduction + Feature record: CXX_FEATURE:1cxx_right_angle_brackets + Feature record: CXX_FEATURE:1cxx_rvalue_references + Feature record: CXX_FEATURE:1cxx_sizeof_member + Feature record: CXX_FEATURE:1cxx_static_assert + Feature record: CXX_FEATURE:1cxx_strong_enums + Feature record: CXX_FEATURE:1cxx_template_template_parameters + Feature record: CXX_FEATURE:1cxx_thread_local + Feature record: CXX_FEATURE:1cxx_trailing_return_types + Feature record: CXX_FEATURE:1cxx_unicode_literals + Feature record: CXX_FEATURE:1cxx_uniform_initialization + Feature record: CXX_FEATURE:1cxx_unrestricted_unions + Feature record: CXX_FEATURE:1cxx_user_literals + Feature record: CXX_FEATURE:0cxx_variable_templates + Feature record: CXX_FEATURE:1cxx_variadic_macros + Feature record: CXX_FEATURE:1cxx_variadic_templates + + +Detecting CXX [-std=c++98] compiler features compiled with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_90018/fast" +/usr/bin/make -f CMakeFiles/cmTC_90018.dir/build.make CMakeFiles/cmTC_90018.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_90018.dir/feature_tests.cxx.o +/usr/bin/c++ -std=c++98 -o CMakeFiles/cmTC_90018.dir/feature_tests.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx +Linking CXX executable cmTC_90018 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_90018.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_90018.dir/feature_tests.cxx.o -o cmTC_90018 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + + Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers + Feature record: CXX_FEATURE:0cxx_alias_templates + Feature record: CXX_FEATURE:0cxx_alignas + Feature record: CXX_FEATURE:0cxx_alignof + Feature record: CXX_FEATURE:0cxx_attributes + Feature record: CXX_FEATURE:0cxx_attribute_deprecated + Feature record: CXX_FEATURE:0cxx_auto_type + Feature record: CXX_FEATURE:0cxx_binary_literals + Feature record: CXX_FEATURE:0cxx_constexpr + Feature record: CXX_FEATURE:0cxx_contextual_conversions + Feature record: CXX_FEATURE:0cxx_decltype + Feature record: CXX_FEATURE:0cxx_decltype_auto + Feature record: CXX_FEATURE:0cxx_decltype_incomplete_return_types + Feature record: CXX_FEATURE:0cxx_default_function_template_args + Feature record: CXX_FEATURE:0cxx_defaulted_functions + Feature record: CXX_FEATURE:0cxx_defaulted_move_initializers + Feature record: CXX_FEATURE:0cxx_delegating_constructors + Feature record: CXX_FEATURE:0cxx_deleted_functions + Feature record: CXX_FEATURE:0cxx_digit_separators + Feature record: CXX_FEATURE:0cxx_enum_forward_declarations + Feature record: CXX_FEATURE:0cxx_explicit_conversions + Feature record: CXX_FEATURE:0cxx_extended_friend_declarations + Feature record: CXX_FEATURE:0cxx_extern_templates + Feature record: CXX_FEATURE:0cxx_final + Feature record: CXX_FEATURE:0cxx_func_identifier + Feature record: CXX_FEATURE:0cxx_generalized_initializers + Feature record: CXX_FEATURE:0cxx_generic_lambdas + Feature record: CXX_FEATURE:0cxx_inheriting_constructors + Feature record: CXX_FEATURE:0cxx_inline_namespaces + Feature record: CXX_FEATURE:0cxx_lambdas + Feature record: CXX_FEATURE:0cxx_lambda_init_captures + Feature record: CXX_FEATURE:0cxx_local_type_template_args + Feature record: CXX_FEATURE:0cxx_long_long_type + Feature record: CXX_FEATURE:0cxx_noexcept + Feature record: CXX_FEATURE:0cxx_nonstatic_member_init + Feature record: CXX_FEATURE:0cxx_nullptr + Feature record: CXX_FEATURE:0cxx_override + Feature record: CXX_FEATURE:0cxx_range_for + Feature record: CXX_FEATURE:0cxx_raw_string_literals + Feature record: CXX_FEATURE:0cxx_reference_qualified_functions + Feature record: CXX_FEATURE:0cxx_relaxed_constexpr + Feature record: CXX_FEATURE:0cxx_return_type_deduction + Feature record: CXX_FEATURE:0cxx_right_angle_brackets + Feature record: CXX_FEATURE:0cxx_rvalue_references + Feature record: CXX_FEATURE:0cxx_sizeof_member + Feature record: CXX_FEATURE:0cxx_static_assert + Feature record: CXX_FEATURE:0cxx_strong_enums + Feature record: CXX_FEATURE:1cxx_template_template_parameters + Feature record: CXX_FEATURE:0cxx_thread_local + Feature record: CXX_FEATURE:0cxx_trailing_return_types + Feature record: CXX_FEATURE:0cxx_unicode_literals + Feature record: CXX_FEATURE:0cxx_uniform_initialization + Feature record: CXX_FEATURE:0cxx_unrestricted_unions + Feature record: CXX_FEATURE:0cxx_user_literals + Feature record: CXX_FEATURE:0cxx_variable_templates + Feature record: CXX_FEATURE:0cxx_variadic_macros + Feature record: CXX_FEATURE:0cxx_variadic_templates +Determining if the include file pthread.h exists passed with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_4d656/fast" +/usr/bin/make -f CMakeFiles/cmTC_4d656.dir/build.make CMakeFiles/cmTC_4d656.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_4d656.dir/CheckIncludeFile.c.o +/usr/bin/cc -fPIC -o CMakeFiles/cmTC_4d656.dir/CheckIncludeFile.c.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/CheckIncludeFile.c +Linking C executable cmTC_4d656 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_4d656.dir/link.txt --verbose=1 +/usr/bin/cc -fPIC CMakeFiles/cmTC_4d656.dir/CheckIncludeFile.c.o -o cmTC_4d656 -rdynamic +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + +Determining if the function pthread_create exists in the pthread passed with the following output: +Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/make" "cmTC_aece3/fast" +/usr/bin/make -f CMakeFiles/cmTC_aece3.dir/build.make CMakeFiles/cmTC_aece3.dir/build +make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_aece3.dir/CheckFunctionExists.c.o +/usr/bin/cc -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create -o CMakeFiles/cmTC_aece3.dir/CheckFunctionExists.c.o -c /usr/share/cmake-3.5/Modules/CheckFunctionExists.c +Linking C executable cmTC_aece3 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_aece3.dir/link.txt --verbose=1 +/usr/bin/cc -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create CMakeFiles/cmTC_aece3.dir/CheckFunctionExists.c.o -o cmTC_aece3 -rdynamic -lpthread +make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp' + + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeRuleHashes.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeRuleHashes.txt new file mode 100644 index 0000000000000000000000000000000000000000..988c14c4be383f9b7bc75c3d6e196b04b99196b8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeRuleHashes.txt @@ -0,0 +1,2 @@ +# Hashes of file build rules. +9de7d50a6da57ec557a0d6ed4f990e8c CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f8cb2571537c410d2267730f9e2dff7d8fb9890f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile.cmake @@ -0,0 +1,68 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# The generator used is: +set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") + +# The top level Makefile was generated from the following files: +set(CMAKE_MAKEFILE_DEPENDS + "CMakeCache.txt" + "../CMakeLists.txt" + "CMakeFiles/3.5.1/CMakeCCompiler.cmake" + "CMakeFiles/3.5.1/CMakeCXXCompiler.cmake" + "CMakeFiles/3.5.1/CMakeSystem.cmake" + "CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend" + "/usr/share/cmake-3.5/Modules/CMakeCInformation.cmake" + "/usr/share/cmake-3.5/Modules/CMakeCXXInformation.cmake" + "/usr/share/cmake-3.5/Modules/CMakeCommonLanguageInclude.cmake" + "/usr/share/cmake-3.5/Modules/CMakeGenericSystem.cmake" + "/usr/share/cmake-3.5/Modules/CMakeLanguageInformation.cmake" + "/usr/share/cmake-3.5/Modules/CMakeParseArguments.cmake" + "/usr/share/cmake-3.5/Modules/CMakeSystemSpecificInformation.cmake" + "/usr/share/cmake-3.5/Modules/CMakeSystemSpecificInitialize.cmake" + "/usr/share/cmake-3.5/Modules/CheckIncludeFile.cmake" + "/usr/share/cmake-3.5/Modules/CheckLibraryExists.cmake" + "/usr/share/cmake-3.5/Modules/CheckSymbolExists.cmake" + "/usr/share/cmake-3.5/Modules/Compiler/GNU-C.cmake" + "/usr/share/cmake-3.5/Modules/Compiler/GNU-CXX.cmake" + "/usr/share/cmake-3.5/Modules/Compiler/GNU.cmake" + "/usr/share/cmake-3.5/Modules/FindCUDA.cmake" + "/usr/share/cmake-3.5/Modules/FindCUDA/run_nvcc.cmake" + "/usr/share/cmake-3.5/Modules/FindPackageHandleStandardArgs.cmake" + "/usr/share/cmake-3.5/Modules/FindPackageMessage.cmake" + "/usr/share/cmake-3.5/Modules/FindThreads.cmake" + "/usr/share/cmake-3.5/Modules/Platform/Linux-GNU-C.cmake" + "/usr/share/cmake-3.5/Modules/Platform/Linux-GNU-CXX.cmake" + "/usr/share/cmake-3.5/Modules/Platform/Linux-GNU.cmake" + "/usr/share/cmake-3.5/Modules/Platform/Linux.cmake" + "/usr/share/cmake-3.5/Modules/Platform/UnixPaths.cmake" + ) + +# The corresponding makefile is: +set(CMAKE_MAKEFILE_OUTPUTS + "Makefile" + "CMakeFiles/cmake.check_cache" + ) + +# Byproducts of CMake generate step: +set(CMAKE_MAKEFILE_PRODUCTS + "CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake" + "CMakeFiles/CMakeDirectoryInformation.cmake" + ) + +# Dependency information for all targets: +set(CMAKE_DEPEND_INFO_FILES + "CMakeFiles/lenet_keras_half.dir/DependInfo.cmake" + "CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake" + "CMakeFiles/fc4_half.dir/DependInfo.cmake" + "CMakeFiles/fc3_clipped.dir/DependInfo.cmake" + "CMakeFiles/fc2_clipped.dir/DependInfo.cmake" + "CMakeFiles/test_ops.dir/DependInfo.cmake" + "CMakeFiles/fc3_half.dir/DependInfo.cmake" + "CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + "CMakeFiles/fc4_clipped.dir/DependInfo.cmake" + "CMakeFiles/cifar_keras.dir/DependInfo.cmake" + "CMakeFiles/lenet_keras.dir/DependInfo.cmake" + "CMakeFiles/lenet_tanh.dir/DependInfo.cmake" + "CMakeFiles/fc2_half.dir/DependInfo.cmake" + ) diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile2 b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile2 new file mode 100644 index 0000000000000000000000000000000000000000..0b344bc3c2932e509eae114809b8b641c1aeb1c5 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile2 @@ -0,0 +1,552 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# The main recursive all target +all: + +.PHONY : all + +# The main recursive preinstall target +preinstall: + +.PHONY : preinstall + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +#============================================================================= +# Target rules for target CMakeFiles/lenet_keras_half.dir + +# All Build rule for target. +CMakeFiles/lenet_keras_half.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/depend + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=17,18 "Built target lenet_keras_half" +.PHONY : CMakeFiles/lenet_keras_half.dir/all + +# Include target in all. +all: CMakeFiles/lenet_keras_half.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/lenet_keras_half.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_keras_half.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/lenet_keras_half.dir/rule + +# Convenience name for target. +lenet_keras_half: CMakeFiles/lenet_keras_half.dir/rule + +.PHONY : lenet_keras_half + +# clean rule for target. +CMakeFiles/lenet_keras_half.dir/clean: + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/clean +.PHONY : CMakeFiles/lenet_keras_half.dir/clean + +# clean rule for target. +clean: CMakeFiles/lenet_keras_half.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/lenet_tanh_half.dir + +# All Build rule for target. +CMakeFiles/lenet_tanh_half.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/depend + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=21,22 "Built target lenet_tanh_half" +.PHONY : CMakeFiles/lenet_tanh_half.dir/all + +# Include target in all. +all: CMakeFiles/lenet_tanh_half.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/lenet_tanh_half.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_tanh_half.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/lenet_tanh_half.dir/rule + +# Convenience name for target. +lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/rule + +.PHONY : lenet_tanh_half + +# clean rule for target. +CMakeFiles/lenet_tanh_half.dir/clean: + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/clean +.PHONY : CMakeFiles/lenet_tanh_half.dir/clean + +# clean rule for target. +clean: CMakeFiles/lenet_tanh_half.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/fc4_half.dir + +# All Build rule for target. +CMakeFiles/fc4_half.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/depend + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=13,14 "Built target fc4_half" +.PHONY : CMakeFiles/fc4_half.dir/all + +# Include target in all. +all: CMakeFiles/fc4_half.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/fc4_half.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc4_half.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/fc4_half.dir/rule + +# Convenience name for target. +fc4_half: CMakeFiles/fc4_half.dir/rule + +.PHONY : fc4_half + +# clean rule for target. +CMakeFiles/fc4_half.dir/clean: + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/clean +.PHONY : CMakeFiles/fc4_half.dir/clean + +# clean rule for target. +clean: CMakeFiles/fc4_half.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/fc3_clipped.dir + +# All Build rule for target. +CMakeFiles/fc3_clipped.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/depend + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=7,8 "Built target fc3_clipped" +.PHONY : CMakeFiles/fc3_clipped.dir/all + +# Include target in all. +all: CMakeFiles/fc3_clipped.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/fc3_clipped.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc3_clipped.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/fc3_clipped.dir/rule + +# Convenience name for target. +fc3_clipped: CMakeFiles/fc3_clipped.dir/rule + +.PHONY : fc3_clipped + +# clean rule for target. +CMakeFiles/fc3_clipped.dir/clean: + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/clean +.PHONY : CMakeFiles/fc3_clipped.dir/clean + +# clean rule for target. +clean: CMakeFiles/fc3_clipped.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/fc2_clipped.dir + +# All Build rule for target. +CMakeFiles/fc2_clipped.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/depend + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=3,4 "Built target fc2_clipped" +.PHONY : CMakeFiles/fc2_clipped.dir/all + +# Include target in all. +all: CMakeFiles/fc2_clipped.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/fc2_clipped.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc2_clipped.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/fc2_clipped.dir/rule + +# Convenience name for target. +fc2_clipped: CMakeFiles/fc2_clipped.dir/rule + +.PHONY : fc2_clipped + +# clean rule for target. +CMakeFiles/fc2_clipped.dir/clean: + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/clean +.PHONY : CMakeFiles/fc2_clipped.dir/clean + +# clean rule for target. +clean: CMakeFiles/fc2_clipped.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/test_ops.dir + +# All Build rule for target. +CMakeFiles/test_ops.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/depend + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=25,26 "Built target test_ops" +.PHONY : CMakeFiles/test_ops.dir/all + +# Include target in all. +all: CMakeFiles/test_ops.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/test_ops.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/test_ops.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/test_ops.dir/rule + +# Convenience name for target. +test_ops: CMakeFiles/test_ops.dir/rule + +.PHONY : test_ops + +# clean rule for target. +CMakeFiles/test_ops.dir/clean: + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/clean +.PHONY : CMakeFiles/test_ops.dir/clean + +# clean rule for target. +clean: CMakeFiles/test_ops.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/fc3_half.dir + +# All Build rule for target. +CMakeFiles/fc3_half.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/depend + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=9,10 "Built target fc3_half" +.PHONY : CMakeFiles/fc3_half.dir/all + +# Include target in all. +all: CMakeFiles/fc3_half.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/fc3_half.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc3_half.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/fc3_half.dir/rule + +# Convenience name for target. +fc3_half: CMakeFiles/fc3_half.dir/rule + +.PHONY : fc3_half + +# clean rule for target. +CMakeFiles/fc3_half.dir/clean: + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/clean +.PHONY : CMakeFiles/fc3_half.dir/clean + +# clean rule for target. +clean: CMakeFiles/fc3_half.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/tensor_runtime.dir + +# All Build rule for target. +CMakeFiles/tensor_runtime.dir/all: + $(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/depend + $(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=23,24 "Built target tensor_runtime" +.PHONY : CMakeFiles/tensor_runtime.dir/all + +# Include target in all. +all: CMakeFiles/tensor_runtime.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/tensor_runtime.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 2 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/tensor_runtime.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/tensor_runtime.dir/rule + +# Convenience name for target. +tensor_runtime: CMakeFiles/tensor_runtime.dir/rule + +.PHONY : tensor_runtime + +# clean rule for target. +CMakeFiles/tensor_runtime.dir/clean: + $(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/clean +.PHONY : CMakeFiles/tensor_runtime.dir/clean + +# clean rule for target. +clean: CMakeFiles/tensor_runtime.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/fc4_clipped.dir + +# All Build rule for target. +CMakeFiles/fc4_clipped.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/depend + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=11,12 "Built target fc4_clipped" +.PHONY : CMakeFiles/fc4_clipped.dir/all + +# Include target in all. +all: CMakeFiles/fc4_clipped.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/fc4_clipped.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc4_clipped.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/fc4_clipped.dir/rule + +# Convenience name for target. +fc4_clipped: CMakeFiles/fc4_clipped.dir/rule + +.PHONY : fc4_clipped + +# clean rule for target. +CMakeFiles/fc4_clipped.dir/clean: + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/clean +.PHONY : CMakeFiles/fc4_clipped.dir/clean + +# clean rule for target. +clean: CMakeFiles/fc4_clipped.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/cifar_keras.dir + +# All Build rule for target. +CMakeFiles/cifar_keras.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/depend + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=1,2 "Built target cifar_keras" +.PHONY : CMakeFiles/cifar_keras.dir/all + +# Include target in all. +all: CMakeFiles/cifar_keras.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/cifar_keras.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/cifar_keras.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/cifar_keras.dir/rule + +# Convenience name for target. +cifar_keras: CMakeFiles/cifar_keras.dir/rule + +.PHONY : cifar_keras + +# clean rule for target. +CMakeFiles/cifar_keras.dir/clean: + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/clean +.PHONY : CMakeFiles/cifar_keras.dir/clean + +# clean rule for target. +clean: CMakeFiles/cifar_keras.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/lenet_keras.dir + +# All Build rule for target. +CMakeFiles/lenet_keras.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/depend + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=15,16 "Built target lenet_keras" +.PHONY : CMakeFiles/lenet_keras.dir/all + +# Include target in all. +all: CMakeFiles/lenet_keras.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/lenet_keras.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_keras.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/lenet_keras.dir/rule + +# Convenience name for target. +lenet_keras: CMakeFiles/lenet_keras.dir/rule + +.PHONY : lenet_keras + +# clean rule for target. +CMakeFiles/lenet_keras.dir/clean: + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/clean +.PHONY : CMakeFiles/lenet_keras.dir/clean + +# clean rule for target. +clean: CMakeFiles/lenet_keras.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/lenet_tanh.dir + +# All Build rule for target. +CMakeFiles/lenet_tanh.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/depend + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=19,20 "Built target lenet_tanh" +.PHONY : CMakeFiles/lenet_tanh.dir/all + +# Include target in all. +all: CMakeFiles/lenet_tanh.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/lenet_tanh.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_tanh.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/lenet_tanh.dir/rule + +# Convenience name for target. +lenet_tanh: CMakeFiles/lenet_tanh.dir/rule + +.PHONY : lenet_tanh + +# clean rule for target. +CMakeFiles/lenet_tanh.dir/clean: + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/clean +.PHONY : CMakeFiles/lenet_tanh.dir/clean + +# clean rule for target. +clean: CMakeFiles/lenet_tanh.dir/clean + +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/fc2_half.dir + +# All Build rule for target. +CMakeFiles/fc2_half.dir/all: CMakeFiles/tensor_runtime.dir/all + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/depend + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=5,6 "Built target fc2_half" +.PHONY : CMakeFiles/fc2_half.dir/all + +# Include target in all. +all: CMakeFiles/fc2_half.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/fc2_half.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc2_half.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : CMakeFiles/fc2_half.dir/rule + +# Convenience name for target. +fc2_half: CMakeFiles/fc2_half.dir/rule + +.PHONY : fc2_half + +# clean rule for target. +CMakeFiles/fc2_half.dir/clean: + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/clean +.PHONY : CMakeFiles/fc2_half.dir/clean + +# clean rule for target. +clean: CMakeFiles/fc2_half.dir/clean + +.PHONY : clean + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/TargetDirectories.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/TargetDirectories.txt new file mode 100644 index 0000000000000000000000000000000000000000..7aa182268196538906366b5fc5667cd3cb92bc7f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/TargetDirectories.txt @@ -0,0 +1,15 @@ +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/edit_cache.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/rebuild_cache.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..78a78817e8c337630492f9d074f99130d5e5b442 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/CXX.includecache @@ -0,0 +1,610 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..4b42d430b83dd89ba4b4356776e1dca9a75d3d6e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..2e4378ebfbc17bb851037c2b1eb2ca03b3955b57 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/cifar_keras.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/cifar_keras.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/cifar_keras.dir/flags.make + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: CMakeFiles/cifar_keras.dir/flags.make +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/src/cifar_keras.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc > CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc -o CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires: + +.PHONY : CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides.build +.PHONY : CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides.build: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o + + +# Object files for target cifar_keras +cifar_keras_OBJECTS = \ +"CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o" + +# External object files for target cifar_keras +cifar_keras_EXTERNAL_OBJECTS = + +cifar_keras: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o +cifar_keras: CMakeFiles/cifar_keras.dir/build.make +cifar_keras: libtensor_runtime.a +cifar_keras: /software/cuda-9.1/lib64/libcudart_static.a +cifar_keras: /usr/lib/x86_64-linux-gnu/librt.so +cifar_keras: /software/cuda-9.1/lib64/libcublas.so +cifar_keras: CMakeFiles/cifar_keras.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable cifar_keras" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/cifar_keras.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/cifar_keras.dir/build: cifar_keras + +.PHONY : CMakeFiles/cifar_keras.dir/build + +CMakeFiles/cifar_keras.dir/requires: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires + +.PHONY : CMakeFiles/cifar_keras.dir/requires + +CMakeFiles/cifar_keras.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/cifar_keras.dir/cmake_clean.cmake +.PHONY : CMakeFiles/cifar_keras.dir/clean + +CMakeFiles/cifar_keras.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/cifar_keras.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..335c9fb2bc422090fd58f558df55be4ff0e2292d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o" + "cifar_keras.pdb" + "cifar_keras" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/cifar_keras.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..a83f9ca39892005e9714e9baa2535debb2237046 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..c69b29f90158618bf7eb4d5afc2d16b3e4265644 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/include/types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/src/cifar_keras.cc +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..95dc6ca79382618bd80eeb65c600f9daa166ec63 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o -o cifar_keras -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..abadeb0c3abaa81d622026fcd3ae096d03dd29b7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 1 +CMAKE_PROGRESS_2 = 2 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cmake.check_cache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cmake.check_cache new file mode 100644 index 0000000000000000000000000000000000000000..3dccd731726d7faa8b29d8d7dba3b981a53ca497 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..8acab7ba46b391f5dc7af10b96f417aebfd080f4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..290629a09dcc44628fd8bfbb815ea84749126e12 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..5e0004f1cb85f419ab45ee449afb52e309e621cb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/fc2_clipped.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/fc2_clipped.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/fc2_clipped.dir/flags.make + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: CMakeFiles/fc2_clipped.dir/flags.make +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/src/fc2_clipped.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc > CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc -o CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires: + +.PHONY : CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides.build +.PHONY : CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides.build: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o + + +# Object files for target fc2_clipped +fc2_clipped_OBJECTS = \ +"CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o" + +# External object files for target fc2_clipped +fc2_clipped_EXTERNAL_OBJECTS = + +fc2_clipped: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o +fc2_clipped: CMakeFiles/fc2_clipped.dir/build.make +fc2_clipped: libtensor_runtime.a +fc2_clipped: /software/cuda-9.1/lib64/libcudart_static.a +fc2_clipped: /usr/lib/x86_64-linux-gnu/librt.so +fc2_clipped: /software/cuda-9.1/lib64/libcublas.so +fc2_clipped: CMakeFiles/fc2_clipped.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc2_clipped" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc2_clipped.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/fc2_clipped.dir/build: fc2_clipped + +.PHONY : CMakeFiles/fc2_clipped.dir/build + +CMakeFiles/fc2_clipped.dir/requires: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires + +.PHONY : CMakeFiles/fc2_clipped.dir/requires + +CMakeFiles/fc2_clipped.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/fc2_clipped.dir/cmake_clean.cmake +.PHONY : CMakeFiles/fc2_clipped.dir/clean + +CMakeFiles/fc2_clipped.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/fc2_clipped.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0140a9a42193824cd671307074eed19164b868ea --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o" + "fc2_clipped.pdb" + "fc2_clipped" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/fc2_clipped.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..ccedd50d251316aeca0ad00cd47dc5c7c98ae5cc --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..3fd2cbb0b5d8936cc61934011ea513010a21ceea --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/include/types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/src/fc2_clipped.cc +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ca553fcc5572e41ec679ffa141b786f8b066f61 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o -o fc2_clipped -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..8c8fb6fbbc138d8387b9ed9bdb2088ee8aa036f6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 3 +CMAKE_PROGRESS_2 = 4 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..2ad6351efadfcce11eb2338cf4e5e6dd68c3e9c4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..fc3896454326f0206ad33fa86b2df38571a21ba6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..53ce420327310bca8328607c4c687f45f408840a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/fc2_half.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/fc2_half.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/fc2_half.dir/flags.make + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: CMakeFiles/fc2_half.dir/flags.make +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/src/half/fc2_half.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc > CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc -o CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires: + +.PHONY : CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides.build +.PHONY : CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides.build: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o + + +# Object files for target fc2_half +fc2_half_OBJECTS = \ +"CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o" + +# External object files for target fc2_half +fc2_half_EXTERNAL_OBJECTS = + +fc2_half: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o +fc2_half: CMakeFiles/fc2_half.dir/build.make +fc2_half: libtensor_runtime.a +fc2_half: /software/cuda-9.1/lib64/libcudart_static.a +fc2_half: /usr/lib/x86_64-linux-gnu/librt.so +fc2_half: /software/cuda-9.1/lib64/libcublas.so +fc2_half: CMakeFiles/fc2_half.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc2_half" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc2_half.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/fc2_half.dir/build: fc2_half + +.PHONY : CMakeFiles/fc2_half.dir/build + +CMakeFiles/fc2_half.dir/requires: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires + +.PHONY : CMakeFiles/fc2_half.dir/requires + +CMakeFiles/fc2_half.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/fc2_half.dir/cmake_clean.cmake +.PHONY : CMakeFiles/fc2_half.dir/clean + +CMakeFiles/fc2_half.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/fc2_half.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e15f368c9c73447a9ff859216be3579b8c6a9f98 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o" + "fc2_half.pdb" + "fc2_half" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/fc2_half.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..63d11755b775dbef7f1245dd9a92ad5472531cbd --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..622ac63922f473f23429e608be0a62e3681c8abc --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/include/types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/src/half/fc2_half.cc +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..533b66238e91aa134146a53bc1452498c9a383e1 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o -o fc2_half -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..3a86673aa7c1868ad77aa16c631effd83be0da02 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 5 +CMAKE_PROGRESS_2 = 6 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..2ee46f5d1f428a09733312af4fa825a1d5cd40bc --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e1e4dd53ee209b075c9e378f3b9bc5f66f7b84a4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..372d86fc4b40de8fb2c6940a06dbfa77de6e0cb3 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/fc3_clipped.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/fc3_clipped.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/fc3_clipped.dir/flags.make + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: CMakeFiles/fc3_clipped.dir/flags.make +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/src/fc3_clipped.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc > CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc -o CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires: + +.PHONY : CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides.build +.PHONY : CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides.build: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o + + +# Object files for target fc3_clipped +fc3_clipped_OBJECTS = \ +"CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o" + +# External object files for target fc3_clipped +fc3_clipped_EXTERNAL_OBJECTS = + +fc3_clipped: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o +fc3_clipped: CMakeFiles/fc3_clipped.dir/build.make +fc3_clipped: libtensor_runtime.a +fc3_clipped: /software/cuda-9.1/lib64/libcudart_static.a +fc3_clipped: /usr/lib/x86_64-linux-gnu/librt.so +fc3_clipped: /software/cuda-9.1/lib64/libcublas.so +fc3_clipped: CMakeFiles/fc3_clipped.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc3_clipped" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc3_clipped.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/fc3_clipped.dir/build: fc3_clipped + +.PHONY : CMakeFiles/fc3_clipped.dir/build + +CMakeFiles/fc3_clipped.dir/requires: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires + +.PHONY : CMakeFiles/fc3_clipped.dir/requires + +CMakeFiles/fc3_clipped.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/fc3_clipped.dir/cmake_clean.cmake +.PHONY : CMakeFiles/fc3_clipped.dir/clean + +CMakeFiles/fc3_clipped.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/fc3_clipped.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c10c79fc608515b6cf396999d266ac0b3c40df26 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o" + "fc3_clipped.pdb" + "fc3_clipped" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/fc3_clipped.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..a132ed403d0cfacf109e55775a1e709ac8828c4e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..33de24568667c0f88d3d89d9b695fab3d9dea392 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/include/types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/src/fc3_clipped.cc +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..34dd27fdc47e7fa2268711dcf46ae7a35cd85036 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o -o fc3_clipped -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..72bb7dd025afc5824222cbd3a1e64841afc2792c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 7 +CMAKE_PROGRESS_2 = 8 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..7a9a368338463bf553ce05065113b374a2f46d48 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ef40691589e0ec87c0c22644f5067e71aaab39a0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..0d90cb857ca1fb3396452948fe968740703c0ec2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/fc3_half.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/fc3_half.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/fc3_half.dir/flags.make + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: CMakeFiles/fc3_half.dir/flags.make +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/src/half/fc3_half.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc > CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc -o CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires: + +.PHONY : CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides.build +.PHONY : CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides.build: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o + + +# Object files for target fc3_half +fc3_half_OBJECTS = \ +"CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o" + +# External object files for target fc3_half +fc3_half_EXTERNAL_OBJECTS = + +fc3_half: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o +fc3_half: CMakeFiles/fc3_half.dir/build.make +fc3_half: libtensor_runtime.a +fc3_half: /software/cuda-9.1/lib64/libcudart_static.a +fc3_half: /usr/lib/x86_64-linux-gnu/librt.so +fc3_half: /software/cuda-9.1/lib64/libcublas.so +fc3_half: CMakeFiles/fc3_half.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc3_half" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc3_half.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/fc3_half.dir/build: fc3_half + +.PHONY : CMakeFiles/fc3_half.dir/build + +CMakeFiles/fc3_half.dir/requires: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires + +.PHONY : CMakeFiles/fc3_half.dir/requires + +CMakeFiles/fc3_half.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/fc3_half.dir/cmake_clean.cmake +.PHONY : CMakeFiles/fc3_half.dir/clean + +CMakeFiles/fc3_half.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/fc3_half.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..75f16f0d1fa72bf2fbc24aad3d50a670cb021c75 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o" + "fc3_half.pdb" + "fc3_half" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/fc3_half.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..a5d2a293e7c97aceec9ba6bc45bc3ee86134af41 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..ca2c2eb82316f95d1114b69d950002a4bdc02ef6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/include/types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/src/half/fc3_half.cc +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..d71d52f75e878f64d1c257720b3266e17d1f1334 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o -o fc3_half -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..b700c2c902219d74619014853aade0d7ec177030 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 9 +CMAKE_PROGRESS_2 = 10 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..7992fbb40f70cb903029f05b75e0bf882ac7fa3d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..6c82d46875329badd24c671d5698a3c366073b44 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..cede9a32472e36ebb8373dee6826c507f3e8821a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/fc4_clipped.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/fc4_clipped.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/fc4_clipped.dir/flags.make + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: CMakeFiles/fc4_clipped.dir/flags.make +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/src/fc4_clipped.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc > CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc -o CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires: + +.PHONY : CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides.build +.PHONY : CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides.build: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o + + +# Object files for target fc4_clipped +fc4_clipped_OBJECTS = \ +"CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o" + +# External object files for target fc4_clipped +fc4_clipped_EXTERNAL_OBJECTS = + +fc4_clipped: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o +fc4_clipped: CMakeFiles/fc4_clipped.dir/build.make +fc4_clipped: libtensor_runtime.a +fc4_clipped: /software/cuda-9.1/lib64/libcudart_static.a +fc4_clipped: /usr/lib/x86_64-linux-gnu/librt.so +fc4_clipped: /software/cuda-9.1/lib64/libcublas.so +fc4_clipped: CMakeFiles/fc4_clipped.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc4_clipped" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc4_clipped.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/fc4_clipped.dir/build: fc4_clipped + +.PHONY : CMakeFiles/fc4_clipped.dir/build + +CMakeFiles/fc4_clipped.dir/requires: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires + +.PHONY : CMakeFiles/fc4_clipped.dir/requires + +CMakeFiles/fc4_clipped.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/fc4_clipped.dir/cmake_clean.cmake +.PHONY : CMakeFiles/fc4_clipped.dir/clean + +CMakeFiles/fc4_clipped.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/fc4_clipped.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8666a8e8cb28664c8bc811043d55de5f02507a4d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o" + "fc4_clipped.pdb" + "fc4_clipped" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/fc4_clipped.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..2c11361bc9c37ce3054af9fabe256bbcfadf998a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..6ca5e15a2cec9d3b3f5d77a99255f4ced3910340 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/include/types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/src/fc4_clipped.cc +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..a705566a567718c9ed1ba2ccb7368f5c980663f7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o -o fc4_clipped -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..596289c0fd56aa23623cdcbea086a133035b3bc8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 11 +CMAKE_PROGRESS_2 = 12 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..433a511749074e96972c1e6985f9b0dc8b9365e1 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..edbcd0fead66f2c20df42fbc7507ab4e3fa495e7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..63a3f0b76f125f34757ee9e9737dea769b0be782 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/fc4_half.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/fc4_half.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/fc4_half.dir/flags.make + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: CMakeFiles/fc4_half.dir/flags.make +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/src/half/fc4_half.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc > CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc -o CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires: + +.PHONY : CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides.build +.PHONY : CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides.build: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o + + +# Object files for target fc4_half +fc4_half_OBJECTS = \ +"CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o" + +# External object files for target fc4_half +fc4_half_EXTERNAL_OBJECTS = + +fc4_half: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o +fc4_half: CMakeFiles/fc4_half.dir/build.make +fc4_half: libtensor_runtime.a +fc4_half: /software/cuda-9.1/lib64/libcudart_static.a +fc4_half: /usr/lib/x86_64-linux-gnu/librt.so +fc4_half: /software/cuda-9.1/lib64/libcublas.so +fc4_half: CMakeFiles/fc4_half.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc4_half" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc4_half.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/fc4_half.dir/build: fc4_half + +.PHONY : CMakeFiles/fc4_half.dir/build + +CMakeFiles/fc4_half.dir/requires: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires + +.PHONY : CMakeFiles/fc4_half.dir/requires + +CMakeFiles/fc4_half.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/fc4_half.dir/cmake_clean.cmake +.PHONY : CMakeFiles/fc4_half.dir/clean + +CMakeFiles/fc4_half.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/fc4_half.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..3d5fa79831540e884599717684998178268aeca2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o" + "fc4_half.pdb" + "fc4_half" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/fc4_half.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..150e2b93982ed4781675774d28c09970064e6b1a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..3584a546c0aeab001413df6299b417b58c3423cd --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/include/types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/src/half/fc4_half.cc +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..8738e81d5ca4717506e51748de1a06e1bdf1c033 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o -o fc4_half -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..d92f75a2935ea31751e4e3d62297a6a1c131fb4d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 13 +CMAKE_PROGRESS_2 = 14 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.bin b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c440e3f3fecefb9cee9c3151cbf31e26cbe7575 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c new file mode 100644 index 0000000000000000000000000000000000000000..6590dded2342f3eebd9b81505327e84a488580e6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c @@ -0,0 +1,34 @@ + + const char features[] = {"\n" +"C_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 +"1" +#else +"0" +#endif +"c_function_prototypes\n" +"C_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +"1" +#else +"0" +#endif +"c_restrict\n" +"C_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201000L +"1" +#else +"0" +#endif +"c_static_assert\n" +"C_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +"1" +#else +"0" +#endif +"c_variadic_macros\n" + +}; + +int main(int argc, char** argv) { (void)argv; return features[argc]; } diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx new file mode 100644 index 0000000000000000000000000000000000000000..b93418c6ed69feaf1b5c2feb9592bbdb5a5f042c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx @@ -0,0 +1,405 @@ + + const char features[] = {"\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L +"1" +#else +"0" +#endif +"cxx_aggregate_default_initializers\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_alias_templates\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_alignas\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_alignof\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_attributes\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_attribute_deprecated\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_auto_type\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_binary_literals\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_constexpr\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_contextual_conversions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_decltype\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_decltype_auto\n" +"CXX_FEATURE:" +#if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_decltype_incomplete_return_types\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_default_function_template_args\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_defaulted_functions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_defaulted_move_initializers\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_delegating_constructors\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_deleted_functions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_digit_separators\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_enum_forward_declarations\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_explicit_conversions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_extended_friend_declarations\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_extern_templates\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_final\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_func_identifier\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_generalized_initializers\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_generic_lambdas\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_inheriting_constructors\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_inline_namespaces\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_lambdas\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_lambda_init_captures\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_local_type_template_args\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_long_long_type\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_noexcept\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_nonstatic_member_init\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_nullptr\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_override\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_range_for\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_raw_string_literals\n" +"CXX_FEATURE:" +#if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_reference_qualified_functions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L +"1" +#else +"0" +#endif +"cxx_relaxed_constexpr\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_return_type_deduction\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_right_angle_brackets\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_rvalue_references\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_sizeof_member\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_static_assert\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_strong_enums\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && __cplusplus +"1" +#else +"0" +#endif +"cxx_template_template_parameters\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_thread_local\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_trailing_return_types\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_unicode_literals\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_uniform_initialization\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_unrestricted_unions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_user_literals\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L +"1" +#else +"0" +#endif +"cxx_variable_templates\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_variadic_macros\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_variadic_templates\n" + +}; + +int main(int argc, char** argv) { (void)argv; return features[argc]; } diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..7b1682a10ad5f3207e7f8e392d79e53e7f78d7d0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/CXX.includecache @@ -0,0 +1,610 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..6c61fef38fa9a233cd858f66f49f624b17f9519c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..0cec553b8789fe646c1ec825842e45410f44ea7c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/lenet_keras.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/lenet_keras.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/lenet_keras.dir/flags.make + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: CMakeFiles/lenet_keras.dir/flags.make +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/src/lenet_keras.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc > CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc -o CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires: + +.PHONY : CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides.build +.PHONY : CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides.build: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o + + +# Object files for target lenet_keras +lenet_keras_OBJECTS = \ +"CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o" + +# External object files for target lenet_keras +lenet_keras_EXTERNAL_OBJECTS = + +lenet_keras: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o +lenet_keras: CMakeFiles/lenet_keras.dir/build.make +lenet_keras: libtensor_runtime.a +lenet_keras: /software/cuda-9.1/lib64/libcudart_static.a +lenet_keras: /usr/lib/x86_64-linux-gnu/librt.so +lenet_keras: /software/cuda-9.1/lib64/libcublas.so +lenet_keras: CMakeFiles/lenet_keras.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_keras" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_keras.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/lenet_keras.dir/build: lenet_keras + +.PHONY : CMakeFiles/lenet_keras.dir/build + +CMakeFiles/lenet_keras.dir/requires: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires + +.PHONY : CMakeFiles/lenet_keras.dir/requires + +CMakeFiles/lenet_keras.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/lenet_keras.dir/cmake_clean.cmake +.PHONY : CMakeFiles/lenet_keras.dir/clean + +CMakeFiles/lenet_keras.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/lenet_keras.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..dbf02b7144e460c621ec6fe1215b3cbe2d49d427 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o" + "lenet_keras.pdb" + "lenet_keras" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/lenet_keras.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..ea2a7799d8be85ef1fdde83046f5b3db84550822 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..933879b479118d71c7b3b2e3bd2dc1d1d4594f5b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/include/types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/src/lenet_keras.cc +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9e1e839dd5a8a40521da465cb989b3033cc4678 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o -o lenet_keras -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..a35c33b98d59108c8111c49cbe919dcc1205b601 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 15 +CMAKE_PROGRESS_2 = 16 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..c9299a69419463ebab3ce01de5794334a0e58e06 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/CXX.includecache @@ -0,0 +1,610 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2130e8a8dc6efdf2d4a62716de3e5a34a8ac999a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..47c160d10423c81d89aec417717f4ce06482ce77 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/lenet_keras_half.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/lenet_keras_half.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/lenet_keras_half.dir/flags.make + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: CMakeFiles/lenet_keras_half.dir/flags.make +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/src/half/lenet_keras_half.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc > CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc -o CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires: + +.PHONY : CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides.build +.PHONY : CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides.build: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o + + +# Object files for target lenet_keras_half +lenet_keras_half_OBJECTS = \ +"CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o" + +# External object files for target lenet_keras_half +lenet_keras_half_EXTERNAL_OBJECTS = + +lenet_keras_half: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o +lenet_keras_half: CMakeFiles/lenet_keras_half.dir/build.make +lenet_keras_half: libtensor_runtime.a +lenet_keras_half: /software/cuda-9.1/lib64/libcudart_static.a +lenet_keras_half: /usr/lib/x86_64-linux-gnu/librt.so +lenet_keras_half: /software/cuda-9.1/lib64/libcublas.so +lenet_keras_half: CMakeFiles/lenet_keras_half.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_keras_half" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_keras_half.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/lenet_keras_half.dir/build: lenet_keras_half + +.PHONY : CMakeFiles/lenet_keras_half.dir/build + +CMakeFiles/lenet_keras_half.dir/requires: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires + +.PHONY : CMakeFiles/lenet_keras_half.dir/requires + +CMakeFiles/lenet_keras_half.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake +.PHONY : CMakeFiles/lenet_keras_half.dir/clean + +CMakeFiles/lenet_keras_half.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/lenet_keras_half.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..cbc020c708683fc107968ba8e2f6ab25474f8677 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o" + "lenet_keras_half.pdb" + "lenet_keras_half" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/lenet_keras_half.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..a5c2ac445903e24e66c542c02888a6ecd34637e4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..1332a21846db60cdff5b441cf8fd85f9be1ddadf --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/include/types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/src/half/lenet_keras_half.cc +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..45906ee60f9080f3b3e4e4c93c4c358428d6fcf9 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o -o lenet_keras_half -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..5a7451db601a4e0b85fc5c33a9eb78c105375e1a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 17 +CMAKE_PROGRESS_2 = 18 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..76459ed35867fdd839c190e488d01189aabb99c0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/CXX.includecache @@ -0,0 +1,610 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2fb9d9dbac8efa5bb6ee705c20a86cd7c4df7f78 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..b63a7f4e90100d741c3a43e777aae900f5195459 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/lenet_tanh.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/lenet_tanh.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/lenet_tanh.dir/flags.make + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: CMakeFiles/lenet_tanh.dir/flags.make +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/src/lenet2_tanh.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc > CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc -o CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires: + +.PHONY : CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides.build +.PHONY : CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides.build: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o + + +# Object files for target lenet_tanh +lenet_tanh_OBJECTS = \ +"CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o" + +# External object files for target lenet_tanh +lenet_tanh_EXTERNAL_OBJECTS = + +lenet_tanh: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o +lenet_tanh: CMakeFiles/lenet_tanh.dir/build.make +lenet_tanh: libtensor_runtime.a +lenet_tanh: /software/cuda-9.1/lib64/libcudart_static.a +lenet_tanh: /usr/lib/x86_64-linux-gnu/librt.so +lenet_tanh: /software/cuda-9.1/lib64/libcublas.so +lenet_tanh: CMakeFiles/lenet_tanh.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_tanh" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_tanh.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/lenet_tanh.dir/build: lenet_tanh + +.PHONY : CMakeFiles/lenet_tanh.dir/build + +CMakeFiles/lenet_tanh.dir/requires: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires + +.PHONY : CMakeFiles/lenet_tanh.dir/requires + +CMakeFiles/lenet_tanh.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/lenet_tanh.dir/cmake_clean.cmake +.PHONY : CMakeFiles/lenet_tanh.dir/clean + +CMakeFiles/lenet_tanh.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/lenet_tanh.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..3d74e65e4e66864ade9b49f5d3871b6ba2a56e4b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o" + "lenet_tanh.pdb" + "lenet_tanh" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/lenet_tanh.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..fa863a217eb1530606b1efc1465d6050895a91fc --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..fea2b140184a4956e7c69e01ddb2d0e7d030f406 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/include/types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/src/lenet2_tanh.cc +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..dabfee65c443e3a0947c391e2473f83382b0ba9c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o -o lenet_tanh -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..48b3d8a54961adb9cc4e043cbf8aaaf7484fc44a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 19 +CMAKE_PROGRESS_2 = 20 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..cf64a9de7d16584dbd1e81f31a1f0269a61e013b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/CXX.includecache @@ -0,0 +1,612 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc +stdio.h +- +stdlib.h +- +unistd.h +- +fcntl.h +- +sys/types.h +- +sys/stat.h +- +string.h +- +../../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +../../include/types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c536e7042bb98ab6b384bc7d7008138d0a17257a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..6ffdff84dd27efacb392adca22206d652d4fd24f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/lenet_tanh_half.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/lenet_tanh_half.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/lenet_tanh_half.dir/flags.make + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: CMakeFiles/lenet_tanh_half.dir/flags.make +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/src/half/lenet_tanh_half.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc > CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc -o CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires: + +.PHONY : CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides.build +.PHONY : CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides.build: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o + + +# Object files for target lenet_tanh_half +lenet_tanh_half_OBJECTS = \ +"CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o" + +# External object files for target lenet_tanh_half +lenet_tanh_half_EXTERNAL_OBJECTS = + +lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o +lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/build.make +lenet_tanh_half: libtensor_runtime.a +lenet_tanh_half: /software/cuda-9.1/lib64/libcudart_static.a +lenet_tanh_half: /usr/lib/x86_64-linux-gnu/librt.so +lenet_tanh_half: /software/cuda-9.1/lib64/libcublas.so +lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_tanh_half" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_tanh_half.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/lenet_tanh_half.dir/build: lenet_tanh_half + +.PHONY : CMakeFiles/lenet_tanh_half.dir/build + +CMakeFiles/lenet_tanh_half.dir/requires: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires + +.PHONY : CMakeFiles/lenet_tanh_half.dir/requires + +CMakeFiles/lenet_tanh_half.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake +.PHONY : CMakeFiles/lenet_tanh_half.dir/clean + +CMakeFiles/lenet_tanh_half.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/lenet_tanh_half.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8fbccf20b6033cb705183eeb2c8cea49fdd01ee4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o" + "lenet_tanh_half.pdb" + "lenet_tanh_half" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/lenet_tanh_half.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..84f058181b48c0ce1160ba79694d636ea646e099 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..8ef1bdbab7082da00db91de78a2b02751a2b83d5 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/include/types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/src/half/lenet_tanh_half.cc +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..accf8f622cae61034eec5579364fa19a717877bd --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o -o lenet_tanh_half -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..6ec2abf9db4adc26734f3497c2ff4710a130a0ae --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 21 +CMAKE_PROGRESS_2 = 22 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks new file mode 100644 index 0000000000000000000000000000000000000000..6f4247a6255c99f420d1df558d68745592862ff7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks @@ -0,0 +1 @@ +26 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..19fab2149bf120962a1699d74b7373348dc4c117 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake @@ -0,0 +1,11 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + ) +# The set of files for implicit dependencies of each language: + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..851ed18eae0d54d5604bb55a0d3a9e72fd9fdedc --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/build.make @@ -0,0 +1,465 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/tensor_runtime.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/tensor_runtime.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/tensor_runtime.dir/flags.make + +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../dnn_sources/include/types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/debug.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/error.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/fp16_conversion.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/fp16_emu.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/fp16_gemm.cu +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/global_data.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/half_precision_api.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/op_overheads.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/profiling.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/tensor.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/tensor_utils.cu +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/src/tensor_runtime.cu +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_discrete.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_discrete2.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_globals.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_kernel.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_lognormal.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_mrg32k3a.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_mtgp32.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_mtgp32_kernel.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_normal.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_normal_static.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_philox4x32_x.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_poisson.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_precalc.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_uniform.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/vector_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/_G_config.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/alloca.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/asm-generic/errno-base.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/asm-generic/errno.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/assert.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/algorithm +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/array +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/backward/auto_ptr.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/backward/binders.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/algorithmfwd.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/alloc_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/allocated_ptr.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/allocator.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/atomic_base.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/atomic_lockfree_defines.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_ios.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_ios.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_string.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_string.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/char_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/codecvt.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/concept_check.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/cpp_type_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/cxxabi_forced.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/exception_defines.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/exception_ptr.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/functexcept.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/functional_hash.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/hash_bytes.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/hashtable.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/hashtable_policy.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ios_base.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/istream.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_classes.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_classes.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_conv.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets_nonio.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets_nonio.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/localefwd.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/memoryfwd.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/move.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/nested_exception.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ostream.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ostream_insert.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/parse_numbers.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/postypes.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/predefined_ops.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ptr_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/random.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/random.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/range_access.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/shared_ptr.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/shared_ptr_atomic.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/shared_ptr_base.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/sstream.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_algo.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_algobase.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_bvector.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_construct.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_function.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_heap.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_iterator.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_iterator_base_funcs.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_iterator_base_types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_map.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_multimap.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_numeric.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_pair.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_raw_storage_iter.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_relops.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_tempbuf.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_tree.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_uninitialized.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_vector.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/streambuf.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/streambuf_iterator.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stringfwd.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/uniform_int_dist.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/unique_ptr.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/unordered_map.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/uses_allocator.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/vector.tcc +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cctype +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cerrno +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cfloat +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/chrono +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/clocale +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cmath +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstdint +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstdio +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstdlib +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstring +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ctime +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cwchar +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cwctype +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/debug/debug.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/exception +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/aligned_buffer.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/alloc_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/atomicity.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/concurrence.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/new_allocator.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/numeric_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/string_conversions.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/type_traits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/functional +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/initializer_list +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/iomanip +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ios +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/iosfwd +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/iostream +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/istream +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/limits +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/locale +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/map +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/memory +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/new +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/numeric +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ostream +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/random +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ratio +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/sstream +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/stdexcept +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/streambuf +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/string +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/system_error +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/tuple +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/type_traits +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/typeinfo +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/unordered_map +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/utility +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/vector +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/ctype.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/endian.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/errno.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/features.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/libintl.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/libio.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/limits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/linux/errno.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/linux/limits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/locale.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/math.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/memory.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/pthread.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/sched.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdc-predef.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdint.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdio.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdlib.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/string.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/time.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/wchar.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/wctype.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/asm/errno.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/byteswap-16.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/byteswap.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/endian.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/errno.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/huge_val.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/huge_valf.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/huge_vall.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/inf.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/local_lim.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/locale.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/math-vector.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/mathcalls.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/mathdef.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/mathinline.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/nan.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/posix1_lim.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/posix2_lim.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/sched.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/select.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/select2.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/setjmp.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/sigset.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdio.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdio2.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdio_lim.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdlib-float.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdlib.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/string3.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/sys_errlist.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/time.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/timex.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/typesizes.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/waitflags.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/waitstatus.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/wchar.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/wchar2.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/wordsize.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/xopen_lim.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/atomic_word.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/c++allocator.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/c++config.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/c++locale.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/cpu_defines.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/ctype_base.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/ctype_inline.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/error_constants.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/gthr-default.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/gthr.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/messages_members.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/opt_random.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/os_defines.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/time_members.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/gnu/stubs-64.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/gnu/stubs.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/cdefs.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/select.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/sysmacros.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/types.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/xlocale.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/limits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/syslimits.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/adxintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/ammintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512bwintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512cdintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512dqintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512erintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmaintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmavlintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512pfintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmiintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmivlintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlbwintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vldqintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/bmi2intrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/bmiintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/clflushoptintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/clwbintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/emmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/f16cintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/float.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/fma4intrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/fxsrintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/ia32intrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/lwpintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/lzcntintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mm3dnow.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mm_malloc.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mwaitxintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/pcommitintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/pmmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/popcntintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/prfchwintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/rdseedintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/rtmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/shaintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/smmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/stdarg.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/stddef.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/stdint.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/tbmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/tmmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/wmmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/x86intrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xmmintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xopintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsavecintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveoptintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsavesintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xtestintrin.h +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake +CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/src/tensor_runtime.cu + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o" + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src && /usr/bin/cmake -E make_directory /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/. + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o -D generated_cubin_file:STRING=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o.cubin.txt -P /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake + +# Object files for target tensor_runtime +tensor_runtime_OBJECTS = + +# External object files for target tensor_runtime +tensor_runtime_EXTERNAL_OBJECTS = \ +"/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o" + +libtensor_runtime.a: CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o +libtensor_runtime.a: CMakeFiles/tensor_runtime.dir/build.make +libtensor_runtime.a: CMakeFiles/tensor_runtime.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX static library libtensor_runtime.a" + $(CMAKE_COMMAND) -P CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/tensor_runtime.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/tensor_runtime.dir/build: libtensor_runtime.a + +.PHONY : CMakeFiles/tensor_runtime.dir/build + +CMakeFiles/tensor_runtime.dir/requires: + +.PHONY : CMakeFiles/tensor_runtime.dir/requires + +CMakeFiles/tensor_runtime.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/tensor_runtime.dir/cmake_clean.cmake +.PHONY : CMakeFiles/tensor_runtime.dir/clean + +CMakeFiles/tensor_runtime.dir/depend: CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/tensor_runtime.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0fc2d57e88004858ee8b99a2b2209fb14f727016 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o" + "libtensor_runtime.pdb" + "libtensor_runtime.a" +) + +# Per-language clean rules from dependency scanning. +foreach(lang ) + include(CMakeFiles/tensor_runtime.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake new file mode 100644 index 0000000000000000000000000000000000000000..25929ee744ec9578314eeb618bd5dd37c2f609cb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake @@ -0,0 +1,3 @@ +file(REMOVE_RECURSE + "libtensor_runtime.a" +) diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..d827cd848fb68755571287c49dbfdebfa8ded06c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.internal @@ -0,0 +1,3 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..d827cd848fb68755571287c49dbfdebfa8ded06c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.make @@ -0,0 +1,3 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..d827cd848fb68755571287c49dbfdebfa8ded06c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/flags.make @@ -0,0 +1,3 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ad396e0e26887f38340329a79418b5335e3a585 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/link.txt @@ -0,0 +1,2 @@ +/usr/bin/ar qc libtensor_runtime.a CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o +/usr/bin/ranlib libtensor_runtime.a diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..6c29f4fb5e35d30c7f60537a3bc9a6d7192f84b9 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 23 +CMAKE_PROGRESS_2 = 24 + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake new file mode 100644 index 0000000000000000000000000000000000000000..549795b7cc0257a54b9db3f3858c17dc65bc19f6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake @@ -0,0 +1,294 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu") # path +set(NVCC_generated_dependency_file "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.5/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.5/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/.") # path +set(generated_file_internal "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o") # path +set(generated_cubin_file_internal "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/software/cuda-9.1/bin/nvcc") # path +set(CUDA_NVCC_FLAGS -gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-O3;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand ;; ) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64;-DNO_INJECTION) # list +set(CUDA_NVCC_INCLUDE_ARGS "-I/software/cuda-9.1/include;-I/software/cuda-9.1/lib64;-I/software/cuda-9.1/lib64/include;-I/software/cuda-9.1/include") # list (needs to be in quotes to handle spaces properly). +set(format_flag "-c") # string +set(cuda_language_flag ) # list + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS ) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + set(nvcc_host_compiler_flags "${nvcc_host_compiler_flags},\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "$(VCInstallDir)bin" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, excape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 9.1) +if(CUDA_VERSION VERSION_LESS "3.0") + cmake_policy(PUSH) + # CMake policy 0007 NEW states that empty list elements are not + # ignored. I'm just setting it to avoid the warning that's printed. + cmake_policy(SET CMP0007 NEW) + # Note that this will remove all occurances of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") + cmake_policy(POP) +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invokation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend new file mode 100644 index 0000000000000000000000000000000000000000..2f1e45707d14f4e9ff7f521aa571ff826d7173eb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend @@ -0,0 +1,373 @@ +# Generated by: make2cmake.cmake +SET(CUDA_NVCC_DEPEND + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu" + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu" + "/software/cuda-9.1/include/builtin_types.h" + "/software/cuda-9.1/include/channel_descriptor.h" + "/software/cuda-9.1/include/common_functions.h" + "/software/cuda-9.1/include/crt/common_functions.h" + "/software/cuda-9.1/include/crt/device_double_functions.h" + "/software/cuda-9.1/include/crt/device_double_functions.hpp" + "/software/cuda-9.1/include/crt/device_functions.h" + "/software/cuda-9.1/include/crt/device_functions.hpp" + "/software/cuda-9.1/include/crt/host_config.h" + "/software/cuda-9.1/include/crt/host_defines.h" + "/software/cuda-9.1/include/crt/math_functions.h" + "/software/cuda-9.1/include/crt/math_functions.hpp" + "/software/cuda-9.1/include/crt/sm_70_rt.h" + "/software/cuda-9.1/include/crt/sm_70_rt.hpp" + "/software/cuda-9.1/include/cuComplex.h" + "/software/cuda-9.1/include/cublas_api.h" + "/software/cuda-9.1/include/cublas_v2.h" + "/software/cuda-9.1/include/cuda.h" + "/software/cuda-9.1/include/cuda_device_runtime_api.h" + "/software/cuda-9.1/include/cuda_fp16.h" + "/software/cuda-9.1/include/cuda_fp16.hpp" + "/software/cuda-9.1/include/cuda_runtime.h" + "/software/cuda-9.1/include/cuda_runtime_api.h" + "/software/cuda-9.1/include/cuda_surface_types.h" + "/software/cuda-9.1/include/cuda_texture_types.h" + "/software/cuda-9.1/include/cudnn.h" + "/software/cuda-9.1/include/curand.h" + "/software/cuda-9.1/include/curand_discrete.h" + "/software/cuda-9.1/include/curand_discrete2.h" + "/software/cuda-9.1/include/curand_globals.h" + "/software/cuda-9.1/include/curand_kernel.h" + "/software/cuda-9.1/include/curand_lognormal.h" + "/software/cuda-9.1/include/curand_mrg32k3a.h" + "/software/cuda-9.1/include/curand_mtgp32.h" + "/software/cuda-9.1/include/curand_mtgp32_kernel.h" + "/software/cuda-9.1/include/curand_normal.h" + "/software/cuda-9.1/include/curand_normal_static.h" + "/software/cuda-9.1/include/curand_philox4x32_x.h" + "/software/cuda-9.1/include/curand_poisson.h" + "/software/cuda-9.1/include/curand_precalc.h" + "/software/cuda-9.1/include/curand_uniform.h" + "/software/cuda-9.1/include/device_atomic_functions.h" + "/software/cuda-9.1/include/device_atomic_functions.hpp" + "/software/cuda-9.1/include/device_functions.h" + "/software/cuda-9.1/include/device_launch_parameters.h" + "/software/cuda-9.1/include/device_types.h" + "/software/cuda-9.1/include/driver_functions.h" + "/software/cuda-9.1/include/driver_types.h" + "/software/cuda-9.1/include/host_config.h" + "/software/cuda-9.1/include/host_defines.h" + "/software/cuda-9.1/include/library_types.h" + "/software/cuda-9.1/include/sm_20_atomic_functions.h" + "/software/cuda-9.1/include/sm_20_atomic_functions.hpp" + "/software/cuda-9.1/include/sm_20_intrinsics.h" + "/software/cuda-9.1/include/sm_20_intrinsics.hpp" + "/software/cuda-9.1/include/sm_30_intrinsics.h" + "/software/cuda-9.1/include/sm_30_intrinsics.hpp" + "/software/cuda-9.1/include/sm_32_atomic_functions.h" + "/software/cuda-9.1/include/sm_32_atomic_functions.hpp" + "/software/cuda-9.1/include/sm_32_intrinsics.h" + "/software/cuda-9.1/include/sm_32_intrinsics.hpp" + "/software/cuda-9.1/include/sm_35_atomic_functions.h" + "/software/cuda-9.1/include/sm_35_intrinsics.h" + "/software/cuda-9.1/include/sm_60_atomic_functions.h" + "/software/cuda-9.1/include/sm_60_atomic_functions.hpp" + "/software/cuda-9.1/include/sm_61_intrinsics.h" + "/software/cuda-9.1/include/sm_61_intrinsics.hpp" + "/software/cuda-9.1/include/surface_functions.h" + "/software/cuda-9.1/include/surface_indirect_functions.h" + "/software/cuda-9.1/include/surface_types.h" + "/software/cuda-9.1/include/texture_fetch_functions.h" + "/software/cuda-9.1/include/texture_indirect_functions.h" + "/software/cuda-9.1/include/texture_types.h" + "/software/cuda-9.1/include/vector_functions.h" + "/software/cuda-9.1/include/vector_functions.hpp" + "/software/cuda-9.1/include/vector_types.h" + "/usr/include/_G_config.h" + "/usr/include/alloca.h" + "/usr/include/asm-generic/errno-base.h" + "/usr/include/asm-generic/errno.h" + "/usr/include/assert.h" + "/usr/include/c++/5/algorithm" + "/usr/include/c++/5/array" + "/usr/include/c++/5/backward/auto_ptr.h" + "/usr/include/c++/5/backward/binders.h" + "/usr/include/c++/5/bits/algorithmfwd.h" + "/usr/include/c++/5/bits/alloc_traits.h" + "/usr/include/c++/5/bits/allocated_ptr.h" + "/usr/include/c++/5/bits/allocator.h" + "/usr/include/c++/5/bits/atomic_base.h" + "/usr/include/c++/5/bits/atomic_lockfree_defines.h" + "/usr/include/c++/5/bits/basic_ios.h" + "/usr/include/c++/5/bits/basic_ios.tcc" + "/usr/include/c++/5/bits/basic_string.h" + "/usr/include/c++/5/bits/basic_string.tcc" + "/usr/include/c++/5/bits/char_traits.h" + "/usr/include/c++/5/bits/codecvt.h" + "/usr/include/c++/5/bits/concept_check.h" + "/usr/include/c++/5/bits/cpp_type_traits.h" + "/usr/include/c++/5/bits/cxxabi_forced.h" + "/usr/include/c++/5/bits/exception_defines.h" + "/usr/include/c++/5/bits/exception_ptr.h" + "/usr/include/c++/5/bits/functexcept.h" + "/usr/include/c++/5/bits/functional_hash.h" + "/usr/include/c++/5/bits/hash_bytes.h" + "/usr/include/c++/5/bits/hashtable.h" + "/usr/include/c++/5/bits/hashtable_policy.h" + "/usr/include/c++/5/bits/ios_base.h" + "/usr/include/c++/5/bits/istream.tcc" + "/usr/include/c++/5/bits/locale_classes.h" + "/usr/include/c++/5/bits/locale_classes.tcc" + "/usr/include/c++/5/bits/locale_conv.h" + "/usr/include/c++/5/bits/locale_facets.h" + "/usr/include/c++/5/bits/locale_facets.tcc" + "/usr/include/c++/5/bits/locale_facets_nonio.h" + "/usr/include/c++/5/bits/locale_facets_nonio.tcc" + "/usr/include/c++/5/bits/localefwd.h" + "/usr/include/c++/5/bits/memoryfwd.h" + "/usr/include/c++/5/bits/move.h" + "/usr/include/c++/5/bits/nested_exception.h" + "/usr/include/c++/5/bits/ostream.tcc" + "/usr/include/c++/5/bits/ostream_insert.h" + "/usr/include/c++/5/bits/parse_numbers.h" + "/usr/include/c++/5/bits/postypes.h" + "/usr/include/c++/5/bits/predefined_ops.h" + "/usr/include/c++/5/bits/ptr_traits.h" + "/usr/include/c++/5/bits/random.h" + "/usr/include/c++/5/bits/random.tcc" + "/usr/include/c++/5/bits/range_access.h" + "/usr/include/c++/5/bits/shared_ptr.h" + "/usr/include/c++/5/bits/shared_ptr_atomic.h" + "/usr/include/c++/5/bits/shared_ptr_base.h" + "/usr/include/c++/5/bits/sstream.tcc" + "/usr/include/c++/5/bits/stl_algo.h" + "/usr/include/c++/5/bits/stl_algobase.h" + "/usr/include/c++/5/bits/stl_bvector.h" + "/usr/include/c++/5/bits/stl_construct.h" + "/usr/include/c++/5/bits/stl_function.h" + "/usr/include/c++/5/bits/stl_heap.h" + "/usr/include/c++/5/bits/stl_iterator.h" + "/usr/include/c++/5/bits/stl_iterator_base_funcs.h" + "/usr/include/c++/5/bits/stl_iterator_base_types.h" + "/usr/include/c++/5/bits/stl_map.h" + "/usr/include/c++/5/bits/stl_multimap.h" + "/usr/include/c++/5/bits/stl_numeric.h" + "/usr/include/c++/5/bits/stl_pair.h" + "/usr/include/c++/5/bits/stl_raw_storage_iter.h" + "/usr/include/c++/5/bits/stl_relops.h" + "/usr/include/c++/5/bits/stl_tempbuf.h" + "/usr/include/c++/5/bits/stl_tree.h" + "/usr/include/c++/5/bits/stl_uninitialized.h" + "/usr/include/c++/5/bits/stl_vector.h" + "/usr/include/c++/5/bits/streambuf.tcc" + "/usr/include/c++/5/bits/streambuf_iterator.h" + "/usr/include/c++/5/bits/stringfwd.h" + "/usr/include/c++/5/bits/uniform_int_dist.h" + "/usr/include/c++/5/bits/unique_ptr.h" + "/usr/include/c++/5/bits/unordered_map.h" + "/usr/include/c++/5/bits/uses_allocator.h" + "/usr/include/c++/5/bits/vector.tcc" + "/usr/include/c++/5/cctype" + "/usr/include/c++/5/cerrno" + "/usr/include/c++/5/cfloat" + "/usr/include/c++/5/chrono" + "/usr/include/c++/5/clocale" + "/usr/include/c++/5/cmath" + "/usr/include/c++/5/cstdint" + "/usr/include/c++/5/cstdio" + "/usr/include/c++/5/cstdlib" + "/usr/include/c++/5/cstring" + "/usr/include/c++/5/ctime" + "/usr/include/c++/5/cwchar" + "/usr/include/c++/5/cwctype" + "/usr/include/c++/5/debug/debug.h" + "/usr/include/c++/5/exception" + "/usr/include/c++/5/ext/aligned_buffer.h" + "/usr/include/c++/5/ext/alloc_traits.h" + "/usr/include/c++/5/ext/atomicity.h" + "/usr/include/c++/5/ext/concurrence.h" + "/usr/include/c++/5/ext/new_allocator.h" + "/usr/include/c++/5/ext/numeric_traits.h" + "/usr/include/c++/5/ext/string_conversions.h" + "/usr/include/c++/5/ext/type_traits.h" + "/usr/include/c++/5/functional" + "/usr/include/c++/5/initializer_list" + "/usr/include/c++/5/iomanip" + "/usr/include/c++/5/ios" + "/usr/include/c++/5/iosfwd" + "/usr/include/c++/5/iostream" + "/usr/include/c++/5/istream" + "/usr/include/c++/5/limits" + "/usr/include/c++/5/locale" + "/usr/include/c++/5/map" + "/usr/include/c++/5/memory" + "/usr/include/c++/5/new" + "/usr/include/c++/5/numeric" + "/usr/include/c++/5/ostream" + "/usr/include/c++/5/random" + "/usr/include/c++/5/ratio" + "/usr/include/c++/5/sstream" + "/usr/include/c++/5/stdexcept" + "/usr/include/c++/5/streambuf" + "/usr/include/c++/5/string" + "/usr/include/c++/5/system_error" + "/usr/include/c++/5/tuple" + "/usr/include/c++/5/type_traits" + "/usr/include/c++/5/typeinfo" + "/usr/include/c++/5/unordered_map" + "/usr/include/c++/5/utility" + "/usr/include/c++/5/vector" + "/usr/include/ctype.h" + "/usr/include/endian.h" + "/usr/include/errno.h" + "/usr/include/features.h" + "/usr/include/libintl.h" + "/usr/include/libio.h" + "/usr/include/limits.h" + "/usr/include/linux/errno.h" + "/usr/include/linux/limits.h" + "/usr/include/locale.h" + "/usr/include/math.h" + "/usr/include/memory.h" + "/usr/include/pthread.h" + "/usr/include/sched.h" + "/usr/include/stdc-predef.h" + "/usr/include/stdint.h" + "/usr/include/stdio.h" + "/usr/include/stdlib.h" + "/usr/include/string.h" + "/usr/include/time.h" + "/usr/include/wchar.h" + "/usr/include/wctype.h" + "/usr/include/x86_64-linux-gnu/asm/errno.h" + "/usr/include/x86_64-linux-gnu/bits/byteswap-16.h" + "/usr/include/x86_64-linux-gnu/bits/byteswap.h" + "/usr/include/x86_64-linux-gnu/bits/endian.h" + "/usr/include/x86_64-linux-gnu/bits/errno.h" + "/usr/include/x86_64-linux-gnu/bits/huge_val.h" + "/usr/include/x86_64-linux-gnu/bits/huge_valf.h" + "/usr/include/x86_64-linux-gnu/bits/huge_vall.h" + "/usr/include/x86_64-linux-gnu/bits/inf.h" + "/usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h" + "/usr/include/x86_64-linux-gnu/bits/local_lim.h" + "/usr/include/x86_64-linux-gnu/bits/locale.h" + "/usr/include/x86_64-linux-gnu/bits/math-vector.h" + "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" + "/usr/include/x86_64-linux-gnu/bits/mathdef.h" + "/usr/include/x86_64-linux-gnu/bits/mathinline.h" + "/usr/include/x86_64-linux-gnu/bits/nan.h" + "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h" + "/usr/include/x86_64-linux-gnu/bits/posix2_lim.h" + "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h" + "/usr/include/x86_64-linux-gnu/bits/sched.h" + "/usr/include/x86_64-linux-gnu/bits/select.h" + "/usr/include/x86_64-linux-gnu/bits/select2.h" + "/usr/include/x86_64-linux-gnu/bits/setjmp.h" + "/usr/include/x86_64-linux-gnu/bits/sigset.h" + "/usr/include/x86_64-linux-gnu/bits/stdio.h" + "/usr/include/x86_64-linux-gnu/bits/stdio2.h" + "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" + "/usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h" + "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h" + "/usr/include/x86_64-linux-gnu/bits/stdlib.h" + "/usr/include/x86_64-linux-gnu/bits/string3.h" + "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h" + "/usr/include/x86_64-linux-gnu/bits/time.h" + "/usr/include/x86_64-linux-gnu/bits/timex.h" + "/usr/include/x86_64-linux-gnu/bits/types.h" + "/usr/include/x86_64-linux-gnu/bits/typesizes.h" + "/usr/include/x86_64-linux-gnu/bits/waitflags.h" + "/usr/include/x86_64-linux-gnu/bits/waitstatus.h" + "/usr/include/x86_64-linux-gnu/bits/wchar.h" + "/usr/include/x86_64-linux-gnu/bits/wchar2.h" + "/usr/include/x86_64-linux-gnu/bits/wordsize.h" + "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/atomic_word.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/c++allocator.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/c++config.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/c++locale.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/cpu_defines.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/ctype_base.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/ctype_inline.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/error_constants.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/gthr-default.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/gthr.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/messages_members.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/opt_random.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/os_defines.h" + "/usr/include/x86_64-linux-gnu/c++/5/bits/time_members.h" + "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h" + "/usr/include/x86_64-linux-gnu/gnu/stubs.h" + "/usr/include/x86_64-linux-gnu/sys/cdefs.h" + "/usr/include/x86_64-linux-gnu/sys/select.h" + "/usr/include/x86_64-linux-gnu/sys/sysmacros.h" + "/usr/include/x86_64-linux-gnu/sys/types.h" + "/usr/include/xlocale.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/limits.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/syslimits.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/adxintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/ammintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512bwintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512cdintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512dqintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512erintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmaintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmavlintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512pfintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmiintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmivlintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlbwintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vldqintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/bmi2intrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/bmiintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/clflushoptintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/clwbintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/emmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/f16cintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/float.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/fma4intrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/fxsrintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/ia32intrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/lwpintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/lzcntintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/mm3dnow.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/mm_malloc.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/mmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/mwaitxintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/pcommitintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/pmmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/popcntintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/prfchwintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/rdseedintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/rtmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/shaintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/smmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/stdarg.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/stddef.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/stdint.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/tbmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/tmmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/wmmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/x86intrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xmmintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xopintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsavecintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveoptintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsavesintrin.h" + "/usr/lib/gcc/x86_64-linux-gnu/5/include/xtestintrin.h" +) + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/CXX.includecache new file mode 100644 index 0000000000000000000000000000000000000000..f1de1eb76c952e08c1055a7b226a3b20cb722e8b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/CXX.includecache @@ -0,0 +1,602 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +sstream +- +../../tensor_runtime/include/tensor.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +types.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc +stdio.h +- +stdlib.h +- +unistd.h +- +../../tensor_runtime/include/tensor_runtime.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +../include/utils.h +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h +cuda_runtime.h +- +device_launch_parameters.h +- +cublas_v2.h +- +cudnn.h +- +cublas_api.h +- +cuda_fp16.h +- +driver_types.h +- + +/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +stdio.h +- +cstdlib +- +cmath +- +memory +- +string +- + +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +surface_types.h +/software/cuda-9.1/include/surface_types.h +texture_types.h +/software/cuda-9.1/include/texture_types.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/common_functions.h +crt/common_functions.h +/software/cuda-9.1/include/crt/common_functions.h + +/software/cuda-9.1/include/crt/common_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +string.h +- +time.h +- +new +- +stdio.h +- +stdlib.h +- +assert.h +- +cuda_device_runtime_api.h +/software/cuda-9.1/include/crt/cuda_device_runtime_api.h +math_functions.h +/software/cuda-9.1/include/crt/math_functions.h + +/software/cuda-9.1/include/crt/device_double_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_double_functions.hpp +/software/cuda-9.1/include/crt/device_double_functions.hpp + +/software/cuda-9.1/include/crt/device_double_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/device_functions.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +device_functions.hpp +/software/cuda-9.1/include/crt/device_functions.hpp +device_atomic_functions.h +/software/cuda-9.1/include/crt/device_atomic_functions.h +device_double_functions.h +/software/cuda-9.1/include/crt/device_double_functions.h +sm_20_atomic_functions.h +/software/cuda-9.1/include/crt/sm_20_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/crt/sm_32_atomic_functions.h +sm_35_atomic_functions.h +/software/cuda-9.1/include/crt/sm_35_atomic_functions.h +sm_60_atomic_functions.h +/software/cuda-9.1/include/crt/sm_60_atomic_functions.h +sm_20_intrinsics.h +/software/cuda-9.1/include/crt/sm_20_intrinsics.h +sm_30_intrinsics.h +/software/cuda-9.1/include/crt/sm_30_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/crt/sm_32_intrinsics.h +sm_35_intrinsics.h +/software/cuda-9.1/include/crt/sm_35_intrinsics.h +sm_61_intrinsics.h +/software/cuda-9.1/include/crt/sm_61_intrinsics.h +sm_70_rt.h +/software/cuda-9.1/include/crt/sm_70_rt.h +surface_functions.h +/software/cuda-9.1/include/crt/surface_functions.h +texture_fetch_functions.h +/software/cuda-9.1/include/crt/texture_fetch_functions.h +texture_indirect_functions.h +/software/cuda-9.1/include/crt/texture_indirect_functions.h +surface_indirect_functions.h +/software/cuda-9.1/include/crt/surface_indirect_functions.h + +/software/cuda-9.1/include/crt/device_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/func_macro.h + +/software/cuda-9.1/include/crt/host_config.h +features.h +- +crtdefs.h +- +corecrt.h +- +cstdarg +- + +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/crt/math_functions.h +__config +- +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math.h +- +stdlib.h +- +cmath +- +cstdlib +- +crt/func_macro.h +- +math_functions.hpp +/software/cuda-9.1/include/crt/math_functions.hpp + +/software/cuda-9.1/include/crt/math_functions.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +math_constants.h +/software/cuda-9.1/include/crt/math_constants.h +crt/func_macro.h +- + +/software/cuda-9.1/include/crt/sm_70_rt.h +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h +sm_70_rt.hpp +/software/cuda-9.1/include/crt/sm_70_rt.hpp + +/software/cuda-9.1/include/crt/sm_70_rt.hpp +builtin_types.h +/software/cuda-9.1/include/crt/builtin_types.h +device_types.h +/software/cuda-9.1/include/crt/device_types.h +host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/cuComplex.h +math.h +- +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/cublas_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuComplex.h +/software/cuda-9.1/include/cuComplex.h +cuda_fp16.h +- +library_types.h +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/cublas_v2.h +cublas_api.h +/software/cuda-9.1/include/cublas_api.h + +/software/cuda-9.1/include/cuda_device_runtime_api.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/cuda_fp16.h +cuda_fp16.hpp +/software/cuda-9.1/include/cuda_fp16.hpp + +/software/cuda-9.1/include/cuda_fp16.hpp +utility +- + +/software/cuda-9.1/include/cuda_runtime.h +host_config.h +/software/cuda-9.1/include/host_config.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +library_types.h +/software/cuda-9.1/include/library_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +cuda_runtime_api.h +/software/cuda-9.1/include/cuda_runtime_api.h +driver_functions.h +/software/cuda-9.1/include/driver_functions.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_functions.h +/software/cuda-9.1/include/vector_functions.h +nvrtc_device_runtime.h +/software/cuda-9.1/include/nvrtc_device_runtime.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +common_functions.h +/software/cuda-9.1/include/common_functions.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +device_functions.h +/software/cuda-9.1/include/device_functions.h +device_launch_parameters.h +/software/cuda-9.1/include/device_launch_parameters.h +functional +- +utility +- + +/software/cuda-9.1/include/cuda_runtime_api.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_device_runtime_api.h +/software/cuda-9.1/include/cuda_device_runtime_api.h + +/software/cuda-9.1/include/cuda_surface_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/cuda_texture_types.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +channel_descriptor.h +/software/cuda-9.1/include/channel_descriptor.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/cudnn.h +driver_types.h +/software/cuda-9.1/include/driver_types.h +cuda_runtime.h +- + +/software/cuda-9.1/include/device_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +device_atomic_functions.hpp +/software/cuda-9.1/include/device_atomic_functions.hpp + +/software/cuda-9.1/include/device_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/device_functions.h +crt/device_functions.h +/software/cuda-9.1/include/crt/device_functions.h + +/software/cuda-9.1/include/device_launch_parameters.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/driver_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/driver_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +limits.h +- +stddef.h +- + +/software/cuda-9.1/include/host_config.h +crt/host_config.h +/software/cuda-9.1/include/crt/host_config.h + +/software/cuda-9.1/include/host_defines.h +crt/host_defines.h +/software/cuda-9.1/include/crt/host_defines.h + +/software/cuda-9.1/include/library_types.h + +/software/cuda-9.1/include/math_constants.h + +/software/cuda-9.1/include/sm_20_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_atomic_functions.hpp +/software/cuda-9.1/include/sm_20_atomic_functions.hpp + +/software/cuda-9.1/include/sm_20_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_20_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_20_intrinsics.hpp +/software/cuda-9.1/include/sm_20_intrinsics.hpp + +/software/cuda-9.1/include/sm_20_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_30_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_30_intrinsics.hpp +/software/cuda-9.1/include/sm_30_intrinsics.hpp + +/software/cuda-9.1/include/sm_30_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_atomic_functions.hpp +/software/cuda-9.1/include/sm_32_atomic_functions.hpp + +/software/cuda-9.1/include/sm_32_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_32_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_32_intrinsics.hpp +/software/cuda-9.1/include/sm_32_intrinsics.hpp + +/software/cuda-9.1/include/sm_32_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_35_atomic_functions.h +sm_32_atomic_functions.h +/software/cuda-9.1/include/sm_32_atomic_functions.h + +/software/cuda-9.1/include/sm_35_intrinsics.h +sm_32_intrinsics.h +/software/cuda-9.1/include/sm_32_intrinsics.h + +/software/cuda-9.1/include/sm_60_atomic_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_60_atomic_functions.hpp +/software/cuda-9.1/include/sm_60_atomic_functions.hpp + +/software/cuda-9.1/include/sm_60_atomic_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/sm_61_intrinsics.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +sm_61_intrinsics.hpp +/software/cuda-9.1/include/sm_61_intrinsics.hpp + +/software/cuda-9.1/include/sm_61_intrinsics.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +device_types.h +/software/cuda-9.1/include/device_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_surface_types.h +/software/cuda-9.1/include/cuda_surface_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +surface_types.h +/software/cuda-9.1/include/surface_types.h + +/software/cuda-9.1/include/surface_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/surface_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/texture_fetch_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +cuda_texture_types.h +/software/cuda-9.1/include/cuda_texture_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +texture_types.h +/software/cuda-9.1/include/texture_types.h + +/software/cuda-9.1/include/texture_indirect_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + +/software/cuda-9.1/include/texture_types.h +driver_types.h +/software/cuda-9.1/include/driver_types.h + +/software/cuda-9.1/include/vector_functions.h +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h +vector_functions.hpp +/software/cuda-9.1/include/vector_functions.hpp + +/software/cuda-9.1/include/vector_functions.hpp +builtin_types.h +/software/cuda-9.1/include/builtin_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h +vector_types.h +/software/cuda-9.1/include/vector_types.h + +/software/cuda-9.1/include/vector_types.h +host_defines.h +/software/cuda-9.1/include/host_defines.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ada151a56820ec3f45cc29fe1047cbc6e0bc4dd0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake @@ -0,0 +1,29 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# Preprocessor definitions for this target. +set(CMAKE_TARGET_DEFINITIONS_CXX + "NO_INJECTION" + ) + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "/software/cuda-9.1/lib64" + "/software/cuda-9.1/lib64/include" + "/software/cuda-9.1/include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake" + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/build.make new file mode 100644 index 0000000000000000000000000000000000000000..5c43744f7421581364eff3f09324e7ab2e4605d0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/build.make @@ -0,0 +1,117 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +# Include any dependencies generated for this target. +include CMakeFiles/test_ops.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/test_ops.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/test_ops.dir/flags.make + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: CMakeFiles/test_ops.dir/flags.make +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/src/test_ops.cc + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc > CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc -o CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires: + +.PHONY : CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides.build +.PHONY : CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides.build: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o + + +# Object files for target test_ops +test_ops_OBJECTS = \ +"CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o" + +# External object files for target test_ops +test_ops_EXTERNAL_OBJECTS = + +test_ops: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o +test_ops: CMakeFiles/test_ops.dir/build.make +test_ops: libtensor_runtime.a +test_ops: /software/cuda-9.1/lib64/libcudart_static.a +test_ops: /usr/lib/x86_64-linux-gnu/librt.so +test_ops: /software/cuda-9.1/lib64/libcublas.so +test_ops: CMakeFiles/test_ops.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable test_ops" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/test_ops.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/test_ops.dir/build: test_ops + +.PHONY : CMakeFiles/test_ops.dir/build + +CMakeFiles/test_ops.dir/requires: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires + +.PHONY : CMakeFiles/test_ops.dir/requires + +CMakeFiles/test_ops.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/test_ops.dir/cmake_clean.cmake +.PHONY : CMakeFiles/test_ops.dir/clean + +CMakeFiles/test_ops.dir/depend: + cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/test_ops.dir/depend + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/cmake_clean.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8b746a56138925ea678e94e7c8d20b911a5f6197 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o" + "test_ops.pdb" + "test_ops" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/test_ops.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.internal new file mode 100644 index 0000000000000000000000000000000000000000..d786512ef56577e0da723c0e49ee5dd6454d0c33 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.internal @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h + /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h + /software/cuda-9.1/include/builtin_types.h + /software/cuda-9.1/include/channel_descriptor.h + /software/cuda-9.1/include/common_functions.h + /software/cuda-9.1/include/crt/common_functions.h + /software/cuda-9.1/include/crt/device_double_functions.h + /software/cuda-9.1/include/crt/device_double_functions.hpp + /software/cuda-9.1/include/crt/device_functions.h + /software/cuda-9.1/include/crt/device_functions.hpp + /software/cuda-9.1/include/crt/func_macro.h + /software/cuda-9.1/include/crt/host_config.h + /software/cuda-9.1/include/crt/host_defines.h + /software/cuda-9.1/include/crt/math_functions.h + /software/cuda-9.1/include/crt/math_functions.hpp + /software/cuda-9.1/include/crt/sm_70_rt.h + /software/cuda-9.1/include/crt/sm_70_rt.hpp + /software/cuda-9.1/include/cuComplex.h + /software/cuda-9.1/include/cublas_api.h + /software/cuda-9.1/include/cublas_v2.h + /software/cuda-9.1/include/cuda_device_runtime_api.h + /software/cuda-9.1/include/cuda_fp16.h + /software/cuda-9.1/include/cuda_fp16.hpp + /software/cuda-9.1/include/cuda_runtime.h + /software/cuda-9.1/include/cuda_runtime_api.h + /software/cuda-9.1/include/cuda_surface_types.h + /software/cuda-9.1/include/cuda_texture_types.h + /software/cuda-9.1/include/cudnn.h + /software/cuda-9.1/include/device_atomic_functions.h + /software/cuda-9.1/include/device_atomic_functions.hpp + /software/cuda-9.1/include/device_functions.h + /software/cuda-9.1/include/device_launch_parameters.h + /software/cuda-9.1/include/device_types.h + /software/cuda-9.1/include/driver_functions.h + /software/cuda-9.1/include/driver_types.h + /software/cuda-9.1/include/host_config.h + /software/cuda-9.1/include/host_defines.h + /software/cuda-9.1/include/library_types.h + /software/cuda-9.1/include/math_constants.h + /software/cuda-9.1/include/sm_20_atomic_functions.h + /software/cuda-9.1/include/sm_20_atomic_functions.hpp + /software/cuda-9.1/include/sm_20_intrinsics.h + /software/cuda-9.1/include/sm_20_intrinsics.hpp + /software/cuda-9.1/include/sm_30_intrinsics.h + /software/cuda-9.1/include/sm_30_intrinsics.hpp + /software/cuda-9.1/include/sm_32_atomic_functions.h + /software/cuda-9.1/include/sm_32_atomic_functions.hpp + /software/cuda-9.1/include/sm_32_intrinsics.h + /software/cuda-9.1/include/sm_32_intrinsics.hpp + /software/cuda-9.1/include/sm_35_atomic_functions.h + /software/cuda-9.1/include/sm_35_intrinsics.h + /software/cuda-9.1/include/sm_60_atomic_functions.h + /software/cuda-9.1/include/sm_60_atomic_functions.hpp + /software/cuda-9.1/include/sm_61_intrinsics.h + /software/cuda-9.1/include/sm_61_intrinsics.hpp + /software/cuda-9.1/include/surface_functions.h + /software/cuda-9.1/include/surface_indirect_functions.h + /software/cuda-9.1/include/surface_types.h + /software/cuda-9.1/include/texture_fetch_functions.h + /software/cuda-9.1/include/texture_indirect_functions.h + /software/cuda-9.1/include/texture_types.h + /software/cuda-9.1/include/vector_functions.h + /software/cuda-9.1/include/vector_functions.hpp + /software/cuda-9.1/include/vector_types.h diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.make new file mode 100644 index 0000000000000000000000000000000000000000..86ecbbbd064b25e07fe9510e44340d64070eb592 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.make @@ -0,0 +1,71 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/include/types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/include/utils.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/src/test_ops.cc +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../tensor_runtime/include/tensor.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../tensor_runtime/include/tensor_runtime.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/builtin_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/channel_descriptor.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/common_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/common_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/func_macro.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/host_config.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/host_defines.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/math_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuComplex.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cublas_api.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cublas_v2.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_fp16.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_runtime.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_surface_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_texture_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cudnn.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_atomic_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_launch_parameters.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/driver_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/driver_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/host_config.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/host_defines.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/library_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/math_constants.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/surface_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/surface_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/texture_types.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/vector_functions.h +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/vector_functions.hpp +CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/vector_types.h + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/flags.make new file mode 100644 index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = -DNO_INJECTION + +CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include + diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/link.txt new file mode 100644 index 0000000000000000000000000000000000000000..1bcafeb846353a7ae25f096bbf90c6af9ac1e476 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -std=c++11 CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o -o test_ops -L/software/cuda-9.1/lib64 -L/software/cuda-9.1/lib64/lib -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/progress.make new file mode 100644 index 0000000000000000000000000000000000000000..9fd0bf530ff91ac241731c4d5429c9b46c9d34a9 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 25 +CMAKE_PROGRESS_2 = 26 + diff --git a/llvm/projects/hpvm-tensor-rt/build/Makefile b/llvm/projects/hpvm-tensor-rt/build/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f79ccd9d8b79aeef44733439fbb2af41d46d1f56 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/Makefile @@ -0,0 +1,676 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.5 + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." + /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache + +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache + +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks + $(MAKE) -f CMakeFiles/Makefile2 all + $(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + $(MAKE) -f CMakeFiles/Makefile2 clean +.PHONY : clean + +# The main clean target +clean/fast: clean + +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +#============================================================================= +# Target rules for targets named lenet_keras_half + +# Build rule for target. +lenet_keras_half: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 lenet_keras_half +.PHONY : lenet_keras_half + +# fast build rule for target. +lenet_keras_half/fast: + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/build +.PHONY : lenet_keras_half/fast + +#============================================================================= +# Target rules for targets named lenet_tanh_half + +# Build rule for target. +lenet_tanh_half: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 lenet_tanh_half +.PHONY : lenet_tanh_half + +# fast build rule for target. +lenet_tanh_half/fast: + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/build +.PHONY : lenet_tanh_half/fast + +#============================================================================= +# Target rules for targets named fc4_half + +# Build rule for target. +fc4_half: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 fc4_half +.PHONY : fc4_half + +# fast build rule for target. +fc4_half/fast: + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/build +.PHONY : fc4_half/fast + +#============================================================================= +# Target rules for targets named fc3_clipped + +# Build rule for target. +fc3_clipped: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 fc3_clipped +.PHONY : fc3_clipped + +# fast build rule for target. +fc3_clipped/fast: + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/build +.PHONY : fc3_clipped/fast + +#============================================================================= +# Target rules for targets named fc2_clipped + +# Build rule for target. +fc2_clipped: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 fc2_clipped +.PHONY : fc2_clipped + +# fast build rule for target. +fc2_clipped/fast: + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/build +.PHONY : fc2_clipped/fast + +#============================================================================= +# Target rules for targets named test_ops + +# Build rule for target. +test_ops: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 test_ops +.PHONY : test_ops + +# fast build rule for target. +test_ops/fast: + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/build +.PHONY : test_ops/fast + +#============================================================================= +# Target rules for targets named fc3_half + +# Build rule for target. +fc3_half: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 fc3_half +.PHONY : fc3_half + +# fast build rule for target. +fc3_half/fast: + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/build +.PHONY : fc3_half/fast + +#============================================================================= +# Target rules for targets named tensor_runtime + +# Build rule for target. +tensor_runtime: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 tensor_runtime +.PHONY : tensor_runtime + +# fast build rule for target. +tensor_runtime/fast: + $(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/build +.PHONY : tensor_runtime/fast + +#============================================================================= +# Target rules for targets named fc4_clipped + +# Build rule for target. +fc4_clipped: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 fc4_clipped +.PHONY : fc4_clipped + +# fast build rule for target. +fc4_clipped/fast: + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/build +.PHONY : fc4_clipped/fast + +#============================================================================= +# Target rules for targets named cifar_keras + +# Build rule for target. +cifar_keras: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 cifar_keras +.PHONY : cifar_keras + +# fast build rule for target. +cifar_keras/fast: + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/build +.PHONY : cifar_keras/fast + +#============================================================================= +# Target rules for targets named lenet_keras + +# Build rule for target. +lenet_keras: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 lenet_keras +.PHONY : lenet_keras + +# fast build rule for target. +lenet_keras/fast: + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/build +.PHONY : lenet_keras/fast + +#============================================================================= +# Target rules for targets named lenet_tanh + +# Build rule for target. +lenet_tanh: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 lenet_tanh +.PHONY : lenet_tanh + +# fast build rule for target. +lenet_tanh/fast: + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/build +.PHONY : lenet_tanh/fast + +#============================================================================= +# Target rules for targets named fc2_half + +# Build rule for target. +fc2_half: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 fc2_half +.PHONY : fc2_half + +# fast build rule for target. +fc2_half/fast: + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/build +.PHONY : fc2_half/fast + +dnn_sources/src/cifar_keras.o: dnn_sources/src/cifar_keras.cc.o + +.PHONY : dnn_sources/src/cifar_keras.o + +# target to build an object file +dnn_sources/src/cifar_keras.cc.o: + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o +.PHONY : dnn_sources/src/cifar_keras.cc.o + +dnn_sources/src/cifar_keras.i: dnn_sources/src/cifar_keras.cc.i + +.PHONY : dnn_sources/src/cifar_keras.i + +# target to preprocess a source file +dnn_sources/src/cifar_keras.cc.i: + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i +.PHONY : dnn_sources/src/cifar_keras.cc.i + +dnn_sources/src/cifar_keras.s: dnn_sources/src/cifar_keras.cc.s + +.PHONY : dnn_sources/src/cifar_keras.s + +# target to generate assembly for a file +dnn_sources/src/cifar_keras.cc.s: + $(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s +.PHONY : dnn_sources/src/cifar_keras.cc.s + +dnn_sources/src/fc2_clipped.o: dnn_sources/src/fc2_clipped.cc.o + +.PHONY : dnn_sources/src/fc2_clipped.o + +# target to build an object file +dnn_sources/src/fc2_clipped.cc.o: + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o +.PHONY : dnn_sources/src/fc2_clipped.cc.o + +dnn_sources/src/fc2_clipped.i: dnn_sources/src/fc2_clipped.cc.i + +.PHONY : dnn_sources/src/fc2_clipped.i + +# target to preprocess a source file +dnn_sources/src/fc2_clipped.cc.i: + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i +.PHONY : dnn_sources/src/fc2_clipped.cc.i + +dnn_sources/src/fc2_clipped.s: dnn_sources/src/fc2_clipped.cc.s + +.PHONY : dnn_sources/src/fc2_clipped.s + +# target to generate assembly for a file +dnn_sources/src/fc2_clipped.cc.s: + $(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s +.PHONY : dnn_sources/src/fc2_clipped.cc.s + +dnn_sources/src/fc3_clipped.o: dnn_sources/src/fc3_clipped.cc.o + +.PHONY : dnn_sources/src/fc3_clipped.o + +# target to build an object file +dnn_sources/src/fc3_clipped.cc.o: + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o +.PHONY : dnn_sources/src/fc3_clipped.cc.o + +dnn_sources/src/fc3_clipped.i: dnn_sources/src/fc3_clipped.cc.i + +.PHONY : dnn_sources/src/fc3_clipped.i + +# target to preprocess a source file +dnn_sources/src/fc3_clipped.cc.i: + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i +.PHONY : dnn_sources/src/fc3_clipped.cc.i + +dnn_sources/src/fc3_clipped.s: dnn_sources/src/fc3_clipped.cc.s + +.PHONY : dnn_sources/src/fc3_clipped.s + +# target to generate assembly for a file +dnn_sources/src/fc3_clipped.cc.s: + $(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s +.PHONY : dnn_sources/src/fc3_clipped.cc.s + +dnn_sources/src/fc4_clipped.o: dnn_sources/src/fc4_clipped.cc.o + +.PHONY : dnn_sources/src/fc4_clipped.o + +# target to build an object file +dnn_sources/src/fc4_clipped.cc.o: + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o +.PHONY : dnn_sources/src/fc4_clipped.cc.o + +dnn_sources/src/fc4_clipped.i: dnn_sources/src/fc4_clipped.cc.i + +.PHONY : dnn_sources/src/fc4_clipped.i + +# target to preprocess a source file +dnn_sources/src/fc4_clipped.cc.i: + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i +.PHONY : dnn_sources/src/fc4_clipped.cc.i + +dnn_sources/src/fc4_clipped.s: dnn_sources/src/fc4_clipped.cc.s + +.PHONY : dnn_sources/src/fc4_clipped.s + +# target to generate assembly for a file +dnn_sources/src/fc4_clipped.cc.s: + $(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s +.PHONY : dnn_sources/src/fc4_clipped.cc.s + +dnn_sources/src/half/fc2_half.o: dnn_sources/src/half/fc2_half.cc.o + +.PHONY : dnn_sources/src/half/fc2_half.o + +# target to build an object file +dnn_sources/src/half/fc2_half.cc.o: + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o +.PHONY : dnn_sources/src/half/fc2_half.cc.o + +dnn_sources/src/half/fc2_half.i: dnn_sources/src/half/fc2_half.cc.i + +.PHONY : dnn_sources/src/half/fc2_half.i + +# target to preprocess a source file +dnn_sources/src/half/fc2_half.cc.i: + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i +.PHONY : dnn_sources/src/half/fc2_half.cc.i + +dnn_sources/src/half/fc2_half.s: dnn_sources/src/half/fc2_half.cc.s + +.PHONY : dnn_sources/src/half/fc2_half.s + +# target to generate assembly for a file +dnn_sources/src/half/fc2_half.cc.s: + $(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s +.PHONY : dnn_sources/src/half/fc2_half.cc.s + +dnn_sources/src/half/fc3_half.o: dnn_sources/src/half/fc3_half.cc.o + +.PHONY : dnn_sources/src/half/fc3_half.o + +# target to build an object file +dnn_sources/src/half/fc3_half.cc.o: + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o +.PHONY : dnn_sources/src/half/fc3_half.cc.o + +dnn_sources/src/half/fc3_half.i: dnn_sources/src/half/fc3_half.cc.i + +.PHONY : dnn_sources/src/half/fc3_half.i + +# target to preprocess a source file +dnn_sources/src/half/fc3_half.cc.i: + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i +.PHONY : dnn_sources/src/half/fc3_half.cc.i + +dnn_sources/src/half/fc3_half.s: dnn_sources/src/half/fc3_half.cc.s + +.PHONY : dnn_sources/src/half/fc3_half.s + +# target to generate assembly for a file +dnn_sources/src/half/fc3_half.cc.s: + $(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s +.PHONY : dnn_sources/src/half/fc3_half.cc.s + +dnn_sources/src/half/fc4_half.o: dnn_sources/src/half/fc4_half.cc.o + +.PHONY : dnn_sources/src/half/fc4_half.o + +# target to build an object file +dnn_sources/src/half/fc4_half.cc.o: + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o +.PHONY : dnn_sources/src/half/fc4_half.cc.o + +dnn_sources/src/half/fc4_half.i: dnn_sources/src/half/fc4_half.cc.i + +.PHONY : dnn_sources/src/half/fc4_half.i + +# target to preprocess a source file +dnn_sources/src/half/fc4_half.cc.i: + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i +.PHONY : dnn_sources/src/half/fc4_half.cc.i + +dnn_sources/src/half/fc4_half.s: dnn_sources/src/half/fc4_half.cc.s + +.PHONY : dnn_sources/src/half/fc4_half.s + +# target to generate assembly for a file +dnn_sources/src/half/fc4_half.cc.s: + $(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s +.PHONY : dnn_sources/src/half/fc4_half.cc.s + +dnn_sources/src/half/lenet_keras_half.o: dnn_sources/src/half/lenet_keras_half.cc.o + +.PHONY : dnn_sources/src/half/lenet_keras_half.o + +# target to build an object file +dnn_sources/src/half/lenet_keras_half.cc.o: + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o +.PHONY : dnn_sources/src/half/lenet_keras_half.cc.o + +dnn_sources/src/half/lenet_keras_half.i: dnn_sources/src/half/lenet_keras_half.cc.i + +.PHONY : dnn_sources/src/half/lenet_keras_half.i + +# target to preprocess a source file +dnn_sources/src/half/lenet_keras_half.cc.i: + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i +.PHONY : dnn_sources/src/half/lenet_keras_half.cc.i + +dnn_sources/src/half/lenet_keras_half.s: dnn_sources/src/half/lenet_keras_half.cc.s + +.PHONY : dnn_sources/src/half/lenet_keras_half.s + +# target to generate assembly for a file +dnn_sources/src/half/lenet_keras_half.cc.s: + $(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s +.PHONY : dnn_sources/src/half/lenet_keras_half.cc.s + +dnn_sources/src/half/lenet_tanh_half.o: dnn_sources/src/half/lenet_tanh_half.cc.o + +.PHONY : dnn_sources/src/half/lenet_tanh_half.o + +# target to build an object file +dnn_sources/src/half/lenet_tanh_half.cc.o: + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o +.PHONY : dnn_sources/src/half/lenet_tanh_half.cc.o + +dnn_sources/src/half/lenet_tanh_half.i: dnn_sources/src/half/lenet_tanh_half.cc.i + +.PHONY : dnn_sources/src/half/lenet_tanh_half.i + +# target to preprocess a source file +dnn_sources/src/half/lenet_tanh_half.cc.i: + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i +.PHONY : dnn_sources/src/half/lenet_tanh_half.cc.i + +dnn_sources/src/half/lenet_tanh_half.s: dnn_sources/src/half/lenet_tanh_half.cc.s + +.PHONY : dnn_sources/src/half/lenet_tanh_half.s + +# target to generate assembly for a file +dnn_sources/src/half/lenet_tanh_half.cc.s: + $(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s +.PHONY : dnn_sources/src/half/lenet_tanh_half.cc.s + +dnn_sources/src/lenet2_tanh.o: dnn_sources/src/lenet2_tanh.cc.o + +.PHONY : dnn_sources/src/lenet2_tanh.o + +# target to build an object file +dnn_sources/src/lenet2_tanh.cc.o: + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o +.PHONY : dnn_sources/src/lenet2_tanh.cc.o + +dnn_sources/src/lenet2_tanh.i: dnn_sources/src/lenet2_tanh.cc.i + +.PHONY : dnn_sources/src/lenet2_tanh.i + +# target to preprocess a source file +dnn_sources/src/lenet2_tanh.cc.i: + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i +.PHONY : dnn_sources/src/lenet2_tanh.cc.i + +dnn_sources/src/lenet2_tanh.s: dnn_sources/src/lenet2_tanh.cc.s + +.PHONY : dnn_sources/src/lenet2_tanh.s + +# target to generate assembly for a file +dnn_sources/src/lenet2_tanh.cc.s: + $(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s +.PHONY : dnn_sources/src/lenet2_tanh.cc.s + +dnn_sources/src/lenet_keras.o: dnn_sources/src/lenet_keras.cc.o + +.PHONY : dnn_sources/src/lenet_keras.o + +# target to build an object file +dnn_sources/src/lenet_keras.cc.o: + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o +.PHONY : dnn_sources/src/lenet_keras.cc.o + +dnn_sources/src/lenet_keras.i: dnn_sources/src/lenet_keras.cc.i + +.PHONY : dnn_sources/src/lenet_keras.i + +# target to preprocess a source file +dnn_sources/src/lenet_keras.cc.i: + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i +.PHONY : dnn_sources/src/lenet_keras.cc.i + +dnn_sources/src/lenet_keras.s: dnn_sources/src/lenet_keras.cc.s + +.PHONY : dnn_sources/src/lenet_keras.s + +# target to generate assembly for a file +dnn_sources/src/lenet_keras.cc.s: + $(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s +.PHONY : dnn_sources/src/lenet_keras.cc.s + +dnn_sources/src/test_ops.o: dnn_sources/src/test_ops.cc.o + +.PHONY : dnn_sources/src/test_ops.o + +# target to build an object file +dnn_sources/src/test_ops.cc.o: + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o +.PHONY : dnn_sources/src/test_ops.cc.o + +dnn_sources/src/test_ops.i: dnn_sources/src/test_ops.cc.i + +.PHONY : dnn_sources/src/test_ops.i + +# target to preprocess a source file +dnn_sources/src/test_ops.cc.i: + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i +.PHONY : dnn_sources/src/test_ops.cc.i + +dnn_sources/src/test_ops.s: dnn_sources/src/test_ops.cc.s + +.PHONY : dnn_sources/src/test_ops.s + +# target to generate assembly for a file +dnn_sources/src/test_ops.cc.s: + $(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s +.PHONY : dnn_sources/src/test_ops.cc.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... lenet_keras_half" + @echo "... lenet_tanh_half" + @echo "... rebuild_cache" + @echo "... fc4_half" + @echo "... fc3_clipped" + @echo "... fc2_clipped" + @echo "... test_ops" + @echo "... fc3_half" + @echo "... tensor_runtime" + @echo "... fc4_clipped" + @echo "... cifar_keras" + @echo "... lenet_keras" + @echo "... lenet_tanh" + @echo "... fc2_half" + @echo "... dnn_sources/src/cifar_keras.o" + @echo "... dnn_sources/src/cifar_keras.i" + @echo "... dnn_sources/src/cifar_keras.s" + @echo "... dnn_sources/src/fc2_clipped.o" + @echo "... dnn_sources/src/fc2_clipped.i" + @echo "... dnn_sources/src/fc2_clipped.s" + @echo "... dnn_sources/src/fc3_clipped.o" + @echo "... dnn_sources/src/fc3_clipped.i" + @echo "... dnn_sources/src/fc3_clipped.s" + @echo "... dnn_sources/src/fc4_clipped.o" + @echo "... dnn_sources/src/fc4_clipped.i" + @echo "... dnn_sources/src/fc4_clipped.s" + @echo "... dnn_sources/src/half/fc2_half.o" + @echo "... dnn_sources/src/half/fc2_half.i" + @echo "... dnn_sources/src/half/fc2_half.s" + @echo "... dnn_sources/src/half/fc3_half.o" + @echo "... dnn_sources/src/half/fc3_half.i" + @echo "... dnn_sources/src/half/fc3_half.s" + @echo "... dnn_sources/src/half/fc4_half.o" + @echo "... dnn_sources/src/half/fc4_half.i" + @echo "... dnn_sources/src/half/fc4_half.s" + @echo "... dnn_sources/src/half/lenet_keras_half.o" + @echo "... dnn_sources/src/half/lenet_keras_half.i" + @echo "... dnn_sources/src/half/lenet_keras_half.s" + @echo "... dnn_sources/src/half/lenet_tanh_half.o" + @echo "... dnn_sources/src/half/lenet_tanh_half.i" + @echo "... dnn_sources/src/half/lenet_tanh_half.s" + @echo "... dnn_sources/src/lenet2_tanh.o" + @echo "... dnn_sources/src/lenet2_tanh.i" + @echo "... dnn_sources/src/lenet2_tanh.s" + @echo "... dnn_sources/src/lenet_keras.o" + @echo "... dnn_sources/src/lenet_keras.i" + @echo "... dnn_sources/src/lenet_keras.s" + @echo "... dnn_sources/src/test_ops.o" + @echo "... dnn_sources/src/test_ops.i" + @echo "... dnn_sources/src/test_ops.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/llvm/projects/hpvm-tensor-rt/build/accuracy_summary b/llvm/projects/hpvm-tensor-rt/build/accuracy_summary new file mode 100644 index 0000000000000000000000000000000000000000..f38e1bf61fbc710c979585d52d994681584f04d8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/accuracy_summary @@ -0,0 +1,6 @@ +tensorGemm 2.000000 0.091064 0.000181 0.000000 0.017550 0.021991 0.000000 50176.000000 21609.626953 16725.333984 5575.111328 +tensorAdd 4.000000 0.125392 0.000249 0.000000 0.023913 0.029955 0.000000 64.000000 22.797260 12.800000 2.560000 +tensorClippedRelu 5.000000 0.024725 0.000057 0.000000 0.027869 0.034927 0.000000 64.000000 21.333334 10.666667 1.777778 +tensorGemm 2.000000 0.183891 0.001160 0.000000 0.017444 0.021799 0.000000 640.000000 275.632996 213.333328 71.111115 +tensorAdd 1.000000 0.152207 0.000961 0.000000 0.014370 0.018029 0.000000 5.000000 2.500000 2.500000 1.250000 +tensorClippedRelu 3.000000 0.004657 0.000077 0.000000 0.021192 0.026456 0.000000 5.000000 1.934264 1.250000 0.312500 diff --git a/llvm/projects/hpvm-tensor-rt/build/cifar_keras b/llvm/projects/hpvm-tensor-rt/build/cifar_keras new file mode 100755 index 0000000000000000000000000000000000000000..bed9ac1acc552cd8bb3713c6a1cc39dabea69428 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/cifar_keras differ diff --git a/llvm/projects/hpvm-tensor-rt/build/cmake_install.cmake b/llvm/projects/hpvm-tensor-rt/build/cmake_install.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c4d523e117f78f2c5b6d4fd7c0d8fbdbd82a9011 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/cmake_install.cmake @@ -0,0 +1,44 @@ +# Install script for directory: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +if(CMAKE_INSTALL_COMPONENT) + set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +else() + set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +endif() + +string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT + "${CMAKE_INSTALL_MANIFEST_FILES}") +file(WRITE "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/${CMAKE_INSTALL_MANIFEST}" + "${CMAKE_INSTALL_MANIFEST_CONTENT}") diff --git a/llvm/projects/hpvm-tensor-rt/build/fc2_clipped b/llvm/projects/hpvm-tensor-rt/build/fc2_clipped new file mode 100755 index 0000000000000000000000000000000000000000..de064ca7ccf662dee5f507f3c874a805cddad39d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc2_clipped differ diff --git a/llvm/projects/hpvm-tensor-rt/build/fc2_half b/llvm/projects/hpvm-tensor-rt/build/fc2_half new file mode 100755 index 0000000000000000000000000000000000000000..228adf76eca4bdf690d6521f1212f97784fe1e11 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc2_half differ diff --git a/llvm/projects/hpvm-tensor-rt/build/fc3_clipped b/llvm/projects/hpvm-tensor-rt/build/fc3_clipped new file mode 100755 index 0000000000000000000000000000000000000000..d2c498c0dca637923eebbe9818bebc0ce66152da Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc3_clipped differ diff --git a/llvm/projects/hpvm-tensor-rt/build/fc3_half b/llvm/projects/hpvm-tensor-rt/build/fc3_half new file mode 100755 index 0000000000000000000000000000000000000000..c45d7357018663c501efc2ed9abf75bbfc4a423c Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc3_half differ diff --git a/llvm/projects/hpvm-tensor-rt/build/fc4_clipped b/llvm/projects/hpvm-tensor-rt/build/fc4_clipped new file mode 100755 index 0000000000000000000000000000000000000000..ea5ac5b3b9429c2b2d5ad8eb3dcb155047dfb460 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc4_clipped differ diff --git a/llvm/projects/hpvm-tensor-rt/build/fc4_half b/llvm/projects/hpvm-tensor-rt/build/fc4_half new file mode 100755 index 0000000000000000000000000000000000000000..0c7951e0f363bda143b683ec0cffc0b3516c5b41 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc4_half differ diff --git a/llvm/projects/hpvm-tensor-rt/build/final_accuracy b/llvm/projects/hpvm-tensor-rt/build/final_accuracy new file mode 100644 index 0000000000000000000000000000000000000000..5dc9446b78c496341f0dbd837720d59340012c6f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/final_accuracy @@ -0,0 +1 @@ +98.699997 \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_keras b/llvm/projects/hpvm-tensor-rt/build/lenet_keras new file mode 100755 index 0000000000000000000000000000000000000000..6b4a876cf5379aeeeb193a9d76ba85fd1ebf5a13 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_keras differ diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_keras_half b/llvm/projects/hpvm-tensor-rt/build/lenet_keras_half new file mode 100755 index 0000000000000000000000000000000000000000..e073bb378e153b36a0108d949b29c1cad48f8050 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_keras_half differ diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_tanh b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh new file mode 100755 index 0000000000000000000000000000000000000000..fcc2dcbe14b70f40034324ca889e029152893c5d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh differ diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_tanh_half b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh_half new file mode 100755 index 0000000000000000000000000000000000000000..f67f83391dd5c30a7ac8b3f3f363e287bfbe2ffb Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh_half differ diff --git a/llvm/projects/hpvm-tensor-rt/build/opentuner_flags b/llvm/projects/hpvm-tensor-rt/build/opentuner_flags new file mode 100644 index 0000000000000000000000000000000000000000..27c9ea9893123efc7982145e8c3423596d3cb75c --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/opentuner_flags @@ -0,0 +1,14 @@ +2 +4 +5 +2 +1 +3 +7 +2 +4 +1 +3 +7 +7 +1 diff --git a/llvm/projects/hpvm-tensor-rt/build/profile_data.txt b/llvm/projects/hpvm-tensor-rt/build/profile_data.txt new file mode 100644 index 0000000000000000000000000000000000000000..76fa1fd253668213c3fb5f206fc9a2b50fc08ce6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/build/profile_data.txt @@ -0,0 +1,30 @@ +tensorConv1 1543644372.403396 +tensorConv_end1 1543644372.406054 0.002658 +tensorAdd1 1543644372.406065 +tensorAdd_end1 1543644372.408212 0.002148 +tensorPooling1 1543644372.408217 +tensorPooling_end1 1543644372.408522 0.000305 +tensorTanh1 1543644372.408527 +tensorTanh_end1 1543644372.408551 0.000023 +tensorConv2 1543644372.408554 +tensorConv_end2 1543644372.414943 0.006389 +tensorAdd2 1543644372.414948 +tensorAdd_end2 1543644372.427007 0.012059 +tensorPooling2 1543644372.427010 +tensorPooling_end2 1543644372.427250 0.000240 +tensorTanh2 1543644372.427255 +tensorTanh_end2 1543644372.427266 0.000010 +tensorGemmGPU1 1543644372.427270 +tensorGemmGPU_end1 1543644372.432362 0.005091 +tensorAdd3 1543644372.432367 +tensorAdd_end3 1543644372.435863 0.003496 +tensorTanh3 1543644372.435866 +tensorTanh_end3 1543644372.435877 0.000010 +tensorGemmGPU2 1543644372.435880 +tensorGemmGPU_end2 1543644372.436120 0.000240 +tensorAdd4 1543644372.436124 +tensorAdd_end4 1543644372.436202 0.000078 +tensorTanh4 1543644372.436205 +tensorTanh_end4 1543644372.436215 0.000009 +tensorSoftmax1 1543644372.436220 +tensorSoftmax_end1 1543644372.436362 0.000142 diff --git a/llvm/projects/hpvm-tensor-rt/build/test_ops b/llvm/projects/hpvm-tensor-rt/build/test_ops new file mode 100755 index 0000000000000000000000000000000000000000..af87a88704c7eb268f3e5fc2fdcb003f4439e9fa Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/test_ops differ diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/op_overheads.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/op_overheads.h new file mode 100644 index 0000000000000000000000000000000000000000..4eaf88e6d613c51a5a75ef8ce73b55a3410f1dbd --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/op_overheads.h @@ -0,0 +1,148 @@ + + +#ifndef OP_OVERHEADS_HEADER +#define OP_OVERHEADS_HEADER + + +#include <sstream> +#include "../../tensor_runtime/include/tensor.h" +#include "types.h" + + +float scale_down_factor = 10000.0; +float error_factor = 0.1; +std::string result_str = ""; + + +// TODO: Every routine needs testing + + +// private function +static float getScaledComps(double total_comps, int error_scale){ + + total_comps = total_comps / scale_down_factor; + float comp_scale = 1.0 + (error_factor * error_scale); + total_comps = total_comps / comp_scale; + + return total_comps; +} + + +static void addNormToResult(float comps){ + + std::ostringstream ss; + ss << std::fixed << comps; + + result_str.append( std::string(ss.str()) ); + result_str.append("\t"); +} + + + +static void addCompsToResult(float comps){ + + std::ostringstream ss; + ss << std::fixed << comps; + + result_str.append( std::string(ss.str()) ); + result_str.append("\n"); +} + + +void add_conv_overheads(void* input_ptr, void* filter_ptr, + int strideA, int strideB, int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + Tensor* filter = (Tensor*) filter_ptr; + +} + + +void add_gemm_overheads(void* lhs_ptr, void* rhs_ptr, int error_scale){ + + Tensor* lhs = (Tensor*) lhs_ptr; + Tensor* rhs = (Tensor*) rhs_ptr; + + int m = lhs->dims.dim_sizes[0]; + // The rhs last dimension must contain the neurons + int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons + int k = 1; + + // Flattening the dimensions after the batch dimension + for (int j = 1 ; j < lhs->dims.num_dims; j++){ + k = k * lhs->dims.dim_sizes[j]; // input neurons + } + + int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; + // Dimension-note: Check if k is same across the two tensors + printf("m = %d, n = %d, k = %d \n", m, n, k); + + if(rhs_k != k){ + printf("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); + abort(); + } + + double total_comps = m * n * rhs_k * 1.0; + float scaled_comps = getScaledComps(total_comps, error_scale); + + printf("error_scale = %d, total_comps = %f, scaled_comps = %f \n", + error_scale, total_comps, scaled_comps); + + addCompsToResult(scaled_comps); + +} + + +void add_bias_overheads(void* input_ptr, int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + + double total_comps = input->num_elems; + float scaled_comps = getScaledComps(total_comps, error_scale); + + printf("error_scale = %d, total_comps = %f, scaled_comps = %f \n", + error_scale, total_comps, scaled_comps); + + addCompsToResult(scaled_comps); + +} + + +void add_relu_overheads(void* input_ptr, int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + + double total_comps = input->num_elems; + float scaled_comps = getScaledComps(total_comps, error_scale); + + printf("error_scale = %d, total_comps = %f, scaled_comps = %f \n", + error_scale, total_comps, scaled_comps); + + addCompsToResult(scaled_comps); + +} + +float add_pool_overheads(void* input_ptr, int kernel_size, + int stride_size, int error_scale){ + +} + + +void add_norms(void* norms_ptr){ + + Norm_t* norms = (Norm_t*) norms_ptr; + + addNormToResult(norms->l1_norm); + addNormToResult(norms->l2_norm); + addNormToResult(norms->inf_norm); + +} + +void dump_result(char* file_name){ + + FILE* fp = fopen(file_name, "w+"); + fwrite(result_str.c_str(), 1, result_str.length(), fp); + fclose(fp); +} + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h new file mode 100644 index 0000000000000000000000000000000000000000..3479a94abec9d6357edc26e4507ec80f8b060acb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h @@ -0,0 +1,38 @@ + +#ifndef TYPES_HEADER +#define TYPES_HEADER + + +struct Dimension_t{ + int num_dims; + size_t* dim_sizes; +}; + + +struct Tensor_t{ + int tensor_id; // used for indexing (in the tensor runtime) + int data_type; // {float_type, double_type, half_type, int_type} + int data_format; // {nchw, nhwc} + void* host_data; + size_t num_elems; // Total elements + size_t size_in_bytes; // Total size in bytes + struct Dimension_t dims; +}; + + +enum Tensor_type_t{ + float_type, + double_type, + half_type, + int_type +}; + + +// NOTE: Currently only NCHW is supported due to limited cuDNN support +enum Tensor_format_t{ + nchw, + nhwc +}; + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..bbbdff5e363cca9d82f065a3972368632cfef726 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h @@ -0,0 +1,349 @@ + +// Header guards +#ifndef UTILS_HEADER +#define UTILS_HEADER + + +#include <sstream> +#include "../../tensor_runtime/include/tensor.h" +#include "types.h" + + +void printTensorInfo(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + if(tensor->gpu_data != NULL){ + printf("Successful cudaMalloc \n"); + } + + printf("tensor dims = %d \n", tensor->dims.num_dims); + printf("dim1_size = %d \n", tensor->dims.dim_sizes[0]); + printf("dim2_size = %d \n", tensor->dims.dim_sizes[1]); + printf("num_elems = %d \n", tensor->num_elems); +} + + +// FIXIT: Move this to debug.h and include in all files +void dumpWeightsToFile(char* file_name, void* weights_ptr){ + + struct Tensor* weights = (Tensor*) weights_ptr; + + // Move data back to host + hpvm_request_tensor(weights, 0); + + FILE* fp = fopen(file_name, "wb"); + if(fp == NULL){ + printf("File %s could not be created. Check if directory exists \n", file_name); + abort(); + } + + printf("size_in_bytes = %d \n", weights->size_in_bytes); + size_t bytes_written = fwrite(weights->host_data, 1, weights->size_in_bytes, fp); + printf("bytes_written = %d \n", bytes_written); + fclose(fp); +} + + + +void fillTensorWithOnes(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + // initialization is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems; i++){ + data_arr[i] = 1.0; + } + } +} + + +void fillWithOnesAndTwos(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + // initialization is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems/2; i++){ + data_arr[i] = 1.0; + } + + for(unsigned int i = tensor->num_elems/2; i < tensor->num_elems; i++){ + data_arr[i] = 2.0; + } + + } +} + + +void fillTensorWithNegOnes(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + // initialization is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems; i++){ + data_arr[i] = -1.0; + } + } +} + + +void fillTensorVals(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + // initialization is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems; i++){ + data_arr[i] = i + 1; + } + } +} + + +void printTensorValues(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + // printing is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems; i++){ + printf("%f,", data_arr[i]); + } + } + + printf("\n"); +} + + +void printTensorDims(void* tensor_ptr){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + printf("Num_elems = %d \n", tensor->num_elems); + for (int i = 0; i < tensor->dims.num_dims; i++){ + printf("dim[%d] = %d \n", i, tensor->dims.dim_sizes[i]); + } +} + + + +void compareTensors(void* tensor1_ptr, void* tensor2_ptr){ + + struct Tensor* tensor1 = (struct Tensor*) tensor1_ptr; + struct Tensor* tensor2 = (struct Tensor*) tensor2_ptr; + + hpvm_request_tensor(tensor1, 0); + hpvm_request_tensor(tensor2, 0); + + float* tensor_data1 = (float*) tensor1->host_data; + float* tensor_data2 = (float*) tensor2->host_data; + + for(unsigned int i = 0; i < tensor1->num_elems; i++){ + if(tensor_data1[i] != tensor_data2[i]){ + printf("Tensor data mismatch at index %d \n", i); + abort(); + } + } +} + + + +void compareValues(void* tensor_ptr, float* data, size_t num_elems){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + float* tensor_data = (float*) tensor->host_data; + for(unsigned int i = 0; i < num_elems; i++){ + if(tensor_data[i] != data[i]){ + printf("Tensor data mismatch"); + abort(); + } + } +} + + +void* readInputTensor(char* file_name, int data_type, int dim1_size, int dim2_size, + int dim3_size, int dim4_size){ + + int type_size = 4; // NOTE: Assuming floating point tensors + int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; + int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; + uint8_t* file_data = (uint8_t*) malloc(sizeof(char) * num_elems); + float* tensor_data = (float*) malloc(sizeof(float) * num_elems); + int file_header_size = 16; + + FILE* file = fopen(file_name, "rb"); + if(file == NULL){ + printf("Data file %s is not found. Aborting... \n", file_name); + abort(); + } + + + fseek(file, file_header_size, SEEK_CUR); // Skipping the file header + size_t bytes_read = fread(file_data, 1, sizeof(uint8_t) * num_elems, file); + + for (size_t i = 0; i < num_elems; ++i){ + tensor_data[i] = (float) file_data[i] / 255.0f; + } + + printf("tensor_data[%d] = %f \n", 10, tensor_data[10]); + + // NOTE: Using NCHW format + struct Tensor* input = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, + dim3_size, dim4_size); + + initTensorData(input, tensor_data, size_in_bytes); + compareValues(input, tensor_data, num_elems); + + return input; +} + + + +struct Tensor* readTrainedWeights(char* file_name, int data_type, int dim1_size, int dim2_size, + int dim3_size, int dim4_size){ + + // FIXIT: Don't assume floating point types + int type_size = 4; // NOTE: Assuming floating point tensors + int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; + int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; + float* tensor_data = (float*) malloc(sizeof(float) * num_elems); + int file_header_size = 0; + + FILE* file = fopen(file_name, "rb"); + if(file == NULL){ + printf("Data file %s is not found. Aborting... \n", file_name); + abort(); + } + + fseek(file, file_header_size, SEEK_CUR); // Skipping the file header + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + + //printf("tensor_data[%d] = %f \n", num_elems-1, tensor_data[num_elems-1]); + + struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, + dim3_size, dim4_size); + + initTensorData(weights, tensor_data, size_in_bytes); + compareValues(weights, tensor_data, num_elems); + + return weights; +} + + +uint8_t* readLabels(char* labels_file, int num_labels){ + + int file_header_size = 8; + uint8_t* labels = (uint8_t*) malloc(sizeof(uint8_t) * num_labels); + FILE* file = fopen(labels_file, "rb"); + if(file == NULL){ + printf("Data file %s is not found. Aborting...\n", labels_file); + abort(); + } + + fseek(file, file_header_size, SEEK_CUR); // Skipping the file header + size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file); + printf("--labels bytes_read = %d \n", bytes_read); + return labels; +} + + + +void computeAccuracy(char* labels_file, int num_labels, void* result_ptr){ + + struct Tensor* result = (struct Tensor*) result_ptr; + + uint8_t* labels = readLabels(labels_file, num_labels); + size_t batch_dim = result->dims.dim_sizes[0]; + size_t channels = result->dims.dim_sizes[1]; + float* data = (float*) result->host_data; + int num_errors = 0; + + for(int i = 0; i < batch_dim; i++){ + int chosen = 0; + for (int id = 1; id < 10; ++id){ + if (data[i * channels + chosen] < data[i * channels + id]) chosen = id; + } + + //printf("chosen = %d, label = %d \n", chosen, labels[i]); + if(chosen != labels[i]) + num_errors++; + } + + float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; + printf("****** Accuracy = %f \n\n", accuracy); + + + FILE* fp = fopen("final_accuracy", "w+"); + if(fp != NULL){ + + std::ostringstream ss; + ss << std::fixed << accuracy; + std::string print_str = ss.str(); + + fwrite(print_str.c_str(), 1, print_str.length(), fp); + fclose(fp); + } + +} + + + + +void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr){ + + struct Tensor* result = (struct Tensor*) result_ptr; + + //uint8_t* labels = readLabels(labels_file, num_labels); + size_t batch_dim = result->dims.dim_sizes[0]; + size_t channels = result->dims.dim_sizes[1]; + float* data = (float*) result->host_data; + int num_errors = 0; + + for(int i = 0; i < batch_dim; i++){ + int chosen = 0; + for (int id = 1; id < 10; ++id){ + if (data[i * channels + chosen] < data[i * channels + id]) chosen = id; + } + + //printf("chosen = %d, label = %d \n", chosen, labels[i]); + if(chosen != labels[i]) + num_errors++; + } + + float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; + printf("****** Accuracy = %f \n\n", accuracy); + + + FILE* fp = fopen("final_accuracy", "w+"); + if(fp != NULL){ + + std::ostringstream ss; + ss << std::fixed << accuracy; + std::string print_str = ss.str(); + + fwrite(print_str.c_str(), 1, print_str.length(), fp); + fclose(fp); + } + +} + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#fc4_clipped.cc# b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#fc4_clipped.cc# new file mode 100644 index 0000000000000000000000000000000000000000..ec409aaa42b6bd11981705764bc6af25c0fb9d01 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#fc4_clipped.cc# @@ -0,0 +1,156 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + +bool Opentuner_run = false; + + +void test_4_Layer_clipped_FC(){ + + int total_runs = 200; + if(Opentuner_run){ + total_runs = 1000000; + } + + printf("********* 3-Layer FC with clipped activations and weights ********* \n"); + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin", + float_type, 1, 1, 784, 512); + void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin", + float_type, 1, 512, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin", + float_type, 1, 1, 512, 256); + void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin", + float_type, 1, 256, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin", + float_type, 1, 1, 256, 128); + void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin", + float_type, 1, 128, 1, 1); + void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin", + float_type, 1, 1, 128, 10); + void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); + + // Layer-3 + void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); + + void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); + + void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2); + + // Layer-4 + void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights); + + void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); + + void* fc4_relu = tensorRelu2(fc4_bias_out, 0, 2); + + void* result = tensorSoftmax(fc4_relu); + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + } + + +} + + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_4_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network.cc# b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network.cc# new file mode 100644 index 0000000000000000000000000000000000000000..6da5842ef83bb76b83e36e69536731c3d709fefa --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network.cc# @@ -0,0 +1,146 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + + +void testFCNetworkArchCPU(){ + + printf("********* Fully Connected DNN-1 ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + //dumpWeightsToFile("tensors_out/input_fc.out", input); + //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights); + + printTensorDims(input); + printTensorDims(fc1_weights); + + // Start profiling tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmCPU(input, fc1_weights); + printTensorDims(fc1out); + //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out); + printTensorDims(fc1_bias_out); + + void* fc1_relu = tensorRelu(fc1_bias_out); + //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu); + printTensorDims(fc1_relu); + + // Layer-2 + void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights); + //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out); + printTensorDims(fc2_bias_out); + + void* fc2_relu = tensorRelu(fc2_bias_out); + //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu); + printTensorDims(fc2_relu); + + void* result = tensorSoftmax(fc2_relu); + printTensorDims(result); + + // stopProfiling + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + + +/* NOTE: Reference Architecture to use for profiling */ +void testFCNetworkArchGPU(){ + + printf("********* Fully Connected DNN-1 ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + + void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + printTensorDims(fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out); + printTensorDims(fc1_bias_out); + + void* fc1_relu = tensorRelu(fc1_bias_out); + //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu); + printTensorDims(fc1_relu); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out); + printTensorDims(fc2_bias_out); + + void* fc2_relu = tensorRelu(fc2_bias_out); + //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu); + printTensorDims(fc2_relu); + + void* result = tensorSoftmax(fc2_relu); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + + +int main(){ + + // This initializes the runtime - must be called before anything + initializeRuntime(0); + + //testFCNetworkArchCPU(); + testFCNetworkArchGPU(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network2.cc# b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network2.cc# new file mode 100644 index 0000000000000000000000000000000000000000..88ceb9105059aeb2eca8f18029af674ea8b14a10 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network2.cc# @@ -0,0 +1,92 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + +void test4LayerFC(){ + + printf("********* 4-layer FC Network ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + float_type, + test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin", + float_type, 1, 1, 784, 1000); + void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin", + float_type, 1, 1000, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin", + float_type, 1, 1, 1000, 500); + void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin", + float_type, 1, 500, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin", + float_type, 1, 1, 500, 200); + void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin", + float_type, 1, 200, 1, 1); + void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin", + float_type, 1, 1, 200, 10); + void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin", + float_type, 1, 10, 1, 1); + + //dumpWeightsToFile("tensors_out/input_fc.out", input); + //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights); + + // Start Profiling execution times of Tensor operations + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + printTensorDims(fc1out); + //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + printTensorDims(fc1_bias_out); + //dumpWeightsToFile("tensors_out/fc_fc1.out", fc1_bias_out); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_bias_out, fc2_weights); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + printTensorDims(fc2_bias_out); + + // Layer-3 + void* fc3out = tensorGemmGPU(fc2_bias_out, fc3_weights); + printTensorDims(fc3out); + + void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); + printTensorDims(fc3_bias_out); + + // Layer-4 + void* fc4out = tensorGemmGPU(fc3_bias_out, fc4_weights); + printTensorDims(fc4out); + + void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); + printTensorDims(fc4_bias_out); + + void* result = tensorSoftmax(fc4_bias_out); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); +} + + + + +int main(){ + + initializeRuntime(0); + + test4LayerFC(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/.#fc4_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/.#fc4_clipped.cc new file mode 120000 index 0000000000000000000000000000000000000000..b1157c7fa0837118c14ccca2c475020504ef12a8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/.#fc4_clipped.cc @@ -0,0 +1 @@ +hsharif3@tyler.cs.illinois.edu.32106:1541049775 \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc new file mode 100644 index 0000000000000000000000000000000000000000..c746e5de6116f701df7370f93969d40486e04e90 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc @@ -0,0 +1,203 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + +bool Opentuner_run = false; + + +/* NOTE: Reference Architecture to use for profiling */ +void testCifarNet(){ + + int total_runs = 1; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* CIFAR-10 DNN ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 1000; + + uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); + + void* input = readTrainedWeights("../model_params/cifar_keras/input.bin", + float_type, + test_batch_size, 3, 32, 32); + + void* conv1_filter = readTrainedWeights("../model_params/cifar_keras/conv1.bin", + float_type, 32, 3, 3, 3); + void* conv1_bias = readTrainedWeights("../model_params/cifar_keras/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/cifar_keras/conv2.bin", + float_type, 64, 32, 3, 3); + void* conv2_bias = readTrainedWeights("../model_params/cifar_keras/conv2_bias.bin", + float_type, 1, 64, 1, 1); + + void* conv3_filter = readTrainedWeights("../model_params/cifar_keras/conv3.bin", + float_type, 128, 64, 3, 3); + void* conv3_bias = readTrainedWeights("../model_params/cifar_keras/conv3_bias.bin", + float_type, 1, 128, 1, 1); + void* conv4_filter = readTrainedWeights("../model_params/cifar_keras/conv4.bin", + float_type, 128, 128, 3, 3); + void* conv4_bias = readTrainedWeights("../model_params/cifar_keras/conv4_bias.bin", + float_type, 1, 128, 1, 1); + + + void* fc1_weights = readTrainedWeights("../model_params/cifar_keras/fc1.bin", + float_type, 1, 1, 2048, 1024); + void* fc1_bias = readTrainedWeights("../model_params/cifar_keras/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/cifar_keras/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/cifar_keras/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + + void* conv1_tanh = tensorTanh(conv1out); + + + // 2nd Layer + void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + + void* conv2_tanh = tensorTanh(conv2out); + + void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); + + + // 3rd Layer + void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv3out, conv3_bias); // NOTE: In place operation + + void* conv3_tanh = tensorTanh(conv3out); + + void* pool3out = tensorPooling(conv3_tanh, 0, 2, 2, 0, 0, 2, 2); + + + // 4th Layer + void* conv4out = tensorConvolution(pool3out, conv4_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv4out, conv4_bias); // NOTE: In place operation + + void* conv4_tanh = tensorTanh(conv4out); + + void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); + + + printTensorDims(pool4out); + + + void* gemm1out = tensorGemmGPU(pool4out, fc1_weights); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + + void* tanh1out = tensorTanh(gemm1biasout); + + void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + + void* tanh2out = tensorTanh(gemm2_biasout); + + void* result = tensorSoftmax(tanh2out); + + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + + +} + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + llvm_hpvm_initTensorRt(0); + + testCifarNet(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc new file mode 100644 index 0000000000000000000000000000000000000000..de19b94b55e878be4f5edf133416b079fab4b8bd --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc @@ -0,0 +1,132 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + +bool Opentuner_run = false; + +void test_2_Layer_clipped_FC(){ + + int total_runs = 10; + + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* 2-Layer FC with clipped activations and weights ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); + + void* result = tensorSoftmax(fc2_relu); + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + } + +} + + +// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_2_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc new file mode 100644 index 0000000000000000000000000000000000000000..a66c4b3ec3ab8380709537d57511a4643adc757a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc @@ -0,0 +1,151 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + + + +bool Opentuner_run = false; + + +void test_3_Layer_clipped_FC(){ + + + int total_runs = 10000; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* 3-Layer FC with clipped activations and weights ********* \n"); + + int test_batch_size = 5000; + + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin", + float_type, 1, 1, 784, 256); + void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin", + float_type, 1, 256, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin", + float_type, 1, 1, 256, 128); + void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin", + float_type, 1, 128, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin", + float_type, 1, 1, 128, 10); + void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin", + float_type, 1, 10, 1, 1); + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + // Start execution profiling Tensor ops + startProfiling(); + + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); + + // Layer-3 + void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); + + void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); + + void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2); + + void* result = tensorSoftmax(fc3_relu); + + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + +} + + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_3_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc new file mode 100644 index 0000000000000000000000000000000000000000..ec409aaa42b6bd11981705764bc6af25c0fb9d01 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc @@ -0,0 +1,156 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + +bool Opentuner_run = false; + + +void test_4_Layer_clipped_FC(){ + + int total_runs = 200; + if(Opentuner_run){ + total_runs = 1000000; + } + + printf("********* 3-Layer FC with clipped activations and weights ********* \n"); + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin", + float_type, 1, 1, 784, 512); + void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin", + float_type, 1, 512, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin", + float_type, 1, 1, 512, 256); + void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin", + float_type, 1, 256, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin", + float_type, 1, 1, 256, 128); + void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin", + float_type, 1, 128, 1, 1); + void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin", + float_type, 1, 1, 128, 10); + void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); + + // Layer-3 + void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); + + void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); + + void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2); + + // Layer-4 + void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights); + + void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); + + void* fc4_relu = tensorRelu2(fc4_bias_out, 0, 2); + + void* result = tensorSoftmax(fc4_relu); + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + } + + +} + + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_4_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..44c03aab875a6de4af6c87776241295cd1fd673b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc @@ -0,0 +1,137 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" +#include "../../include/types.h" + + +bool Opentuner_run = false; + + +void test_2_Layer_clipped_FC(){ + + int total_runs = 1; + + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* 2-Layer FC with clipped activations and weights ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorHalfGemm(input, fc1_weights); + + void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2); + + void* result = tensorSoftmax(fc2_relu); + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + +} + + +// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_2_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..697fea9b8aa61a8c3cf5ec3e8d0d66466df9b1e8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc @@ -0,0 +1,151 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" +#include "../../include/types.h" + + + + +bool Opentuner_run = false; + + +void test_3_Layer_clipped_FC(){ + + + int total_runs = 1000; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* 3-Layer FC with clipped activations and weights ********* \n"); + + int test_batch_size = 5000; + + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin", + float_type, 1, 1, 784, 256); + void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin", + float_type, 1, 256, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin", + float_type, 1, 1, 256, 128); + void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin", + float_type, 1, 128, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin", + float_type, 1, 1, 128, 10); + void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin", + float_type, 1, 10, 1, 1); + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + // Start execution profiling Tensor ops + startProfiling(); + + + // Layer-1 + void* fc1out = tensorHalfGemm(input, fc1_weights); + + void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2); + + // Layer-3 + void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights); + + void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias); + + void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2); + + void* result = tensorSoftmax(fc3_relu); + + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + +} + + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_3_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..ad999165cfd4148479de58e24fed8291161da491 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc @@ -0,0 +1,156 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" +#include "../../include/types.h" + + +bool Opentuner_run = false; + + +void test_4_Layer_clipped_FC(){ + + int total_runs = 1; + if(Opentuner_run){ + total_runs = 1000000; + } + + printf("********* 3-Layer FC with clipped activations and weights ********* \n"); + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin", + float_type, 1, 1, 784, 512); + void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin", + float_type, 1, 512, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin", + float_type, 1, 1, 512, 256); + void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin", + float_type, 1, 256, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin", + float_type, 1, 1, 256, 128); + void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin", + float_type, 1, 128, 1, 1); + void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin", + float_type, 1, 1, 128, 10); + void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorHalfGemm(input, fc1_weights); + + void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias); + + void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2); + + // Layer-2 + void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights); + + void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias); + + void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2); + + // Layer-3 + void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights); + + void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias); + + void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2); + + // Layer-4 + void* fc4out = tensorHalfGemm(fc3_relu, fc4_weights); + + void* fc4_bias_out = tensorHalfAdd(fc4out, fc4_bias); + + void* fc4_relu = tensorHalfRelu2(fc4_bias_out, 0, 2); + + void* result = tensorSoftmax(fc4_relu); + + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + } + + +} + + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_4_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..dd68f2b48eb66456061bb93decc1cbd985887be0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc @@ -0,0 +1,171 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" + + +bool Opentuner_run = false; + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenetTanh(){ + + int total_runs = 1; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* Lenet-2 Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation + + void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + + void* conv1_tanh = tensorHalfTanh(pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation + + void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + + void* conv2_tanh = tensorHalfTanh(pool2out); + + void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights); + + void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias); + + void* tanh1out = tensorHalfTanh(gemm1biasout); + + void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights); + + void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias); + + void* tanh2out = tensorHalfTanh(gemm2_biasout); + + void* result = tensorSoftmax(tanh2out); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + + +} + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + llvm_hpvm_initTensorRt(0); + + testLenetTanh(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..bb45b14d62e061e704b252aa44e602e0c1d08ba7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc @@ -0,0 +1,173 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" +#include "../../include/types.h" + + + +bool Opentuner_run = false; + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenetTanh(){ + + int total_runs = 1; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* Lenet-2 Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation + + void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + + void* conv1_tanh = tensorHalfTanh(pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation + + void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + + void* conv2_tanh = tensorHalfTanh(pool2out); + + void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights); + + void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias); + + void* tanh1out = tensorHalfTanh(gemm1biasout); + + void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights); + + void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias); + + void* tanh2out = tensorHalfTanh(gemm2_biasout); + + void* result = tensorSoftmax(tanh2out); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + + +} + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + llvm_hpvm_initTensorRt(0); + + testLenetTanh(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc new file mode 100644 index 0000000000000000000000000000000000000000..d2d663552fdab6366f28655ca835ba63cb4fcee4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc @@ -0,0 +1,171 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + +bool Opentuner_run = false; + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenetTanh(){ + + int total_runs = 1; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* Lenet-2 Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + + void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + + void* conv1_tanh = tensorTanh(pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + + void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + + void* conv2_tanh = tensorTanh(pool2out); + + void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + + void* tanh1out = tensorTanh(gemm1biasout); + + void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + + void* tanh2out = tensorTanh(gemm2_biasout); + + void* result = tensorSoftmax(tanh2out); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + + +} + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + llvm_hpvm_initTensorRt(0); + + testLenetTanh(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc new file mode 100644 index 0000000000000000000000000000000000000000..b6854f4563e34b375991b131b04c77e6ba83f577 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc @@ -0,0 +1,171 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + +bool Opentuner_run = false; + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenetTanh(){ + + int total_runs = 1; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* Lenet-2 Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); + + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + + void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + + void* conv1_tanh = tensorTanh(pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + + void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + + void* conv2_tanh = tensorTanh(pool2out); + + void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + + void* tanh1out = tensorTanh(gemm1biasout); + + void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + + void* tanh2out = tensorTanh(gemm2_biasout); + + void* result = tensorSoftmax(tanh2out); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + + +} + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + llvm_hpvm_initTensorRt(0); + + testLenetTanh(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/fc2_clipped_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/fc2_clipped_promise.cc new file mode 100644 index 0000000000000000000000000000000000000000..d7addd7283e24bedfc32d57d84c4ce17d9966f57 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/fc2_clipped_promise.cc @@ -0,0 +1,80 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" +#include "../../include/types.h" + + + +void test_2_Layer_clipped_FC(){ + + printf("********* 2-Layer FC with clipped activations and weights ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + printTensorDims(fc1out); + dumpWeightsToFile("tensors_out2/fc1out.out", fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + printTensorDims(fc1_bias_out); + dumpWeightsToFile("tensors_out2/fc1_bias.out", fc1_bias_out); + + void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); + printTensorDims(fc1_relu); + dumpWeightsToFile("tensors_out2/fc1_clipped_relu.out", fc1_relu); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + printTensorDims(fc2out); + dumpWeightsToFile("tensors_out2/fc2out.out", fc2out); + + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + printTensorDims(fc2_bias_out); + + void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); + printTensorDims(fc2_relu); + + void* result = tensorSoftmax(fc2_relu); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); +} + + + +int main(){ + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + test_2_Layer_clipped_FC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2.cc new file mode 100644 index 0000000000000000000000000000000000000000..d5211be3918adcd030fc40c13cba1ff0d7c53c18 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2.cc @@ -0,0 +1,112 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenet2Arch(){ + + printf("********* Lenet-2 Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + printTensorDims(conv1out); + + void* conv1_reluout = tensorRelu(conv1out); + //dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout); + + void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool1out); + //dumpWeightsToFile("tensors_out/pool1.out", pool1out); + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + printTensorDims(conv2out); + + void* conv2_reluout = tensorRelu(conv2out); + //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout); + + void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool2out); + //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); + + void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); + printTensorDims(gemm1out); + //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + printTensorDims(gemm1biasout); + + void* relu1out = tensorRelu(gemm1biasout); + printTensorDims(relu1out); + + void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); + printTensorDims(gemm2out); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + printTensorDims(gemm2_biasout); + + void* result = tensorSoftmax(gemm2_biasout); + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", + test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + testLenet2Arch(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2_promise.cc new file mode 100644 index 0000000000000000000000000000000000000000..358cb6a75b8e63ca0a0bd964c9f73f2d16c39b4f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2_promise.cc @@ -0,0 +1,113 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" + + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenet2Arch(){ + + printf("********* Lenet-2 Architecture ********** \n"); + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + dumpWeightsToFile("tensors_out/conv1_out.out", conv1out); + + tensorAdd(conv1out, conv1_bias); // NOTE: In-place operation + printTensorDims(conv1out); + + dumpWeightsToFile("tensors_out/conv1_bias_add.out", conv1out); + + void* conv1_reluout = tensorRelu(conv1out); + dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout); + + void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool1out); + dumpWeightsToFile("tensors_out/conv1_pool.out", pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + printTensorDims(conv2out); + + void* conv2_reluout = tensorRelu(conv2out); + //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout); + + void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool2out); + //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); + + void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); + printTensorDims(gemm1out); + //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + printTensorDims(gemm1biasout); + + void* relu1out = tensorRelu(gemm1biasout); + printTensorDims(relu1out); + + void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); + printTensorDims(gemm2out); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + printTensorDims(gemm2_biasout); + + void* result = tensorSoftmax(gemm2_biasout); + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", + test_batch_size, result); +} + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + testLenet2Arch(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet_acc.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet_acc.cc new file mode 100644 index 0000000000000000000000000000000000000000..42e364289e499d92591692a04e42988fd1a66dc5 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet_acc.cc @@ -0,0 +1,109 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../../tensor_runtime/include/tensor_runtime.h" +#include "../../include/utils.h" + + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenet2Arch(){ + + printf("********* Lenet-2 Architecture ********** \n"); + + int test_batch_size = 1000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_test_params/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_test_params/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_test_params/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_test_params/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_test_params/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet_test_params/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet_test_params/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet_test_params/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + printTensorDims(conv1out); + + void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool1out); + //dumpWeightsToFile("tensors_out/pool1.out", pool1out); + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + printTensorDims(conv2out); + + //void* conv2_reluout = tensorRelu(conv2out); + + void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool2out); + //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); + + void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); + printTensorDims(gemm1out); + //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + printTensorDims(gemm1biasout); + + void* relu1out = tensorRelu(gemm1biasout); + printTensorDims(relu1out); + + void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); + printTensorDims(gemm2out); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + printTensorDims(gemm2_biasout); + + void* result = tensorSoftmax(gemm2_biasout); + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", + test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + testLenet2Arch(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_alexnet.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_alexnet.cc new file mode 100644 index 0000000000000000000000000000000000000000..a8129a1e459a15e26f595972724451e01d81b0a1 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_alexnet.cc @@ -0,0 +1,480 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + +// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem + +/*void testAlexnet2(){ + + struct Tensor* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 100, 3, 224, 224); + initTensorValues(input); + + struct Tensor* conv1filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 96, 3, 11, 11); + printTensorDims(conv1filter); + + /****** Start of Layer 1 *************** + + // NOTE: Padding for same conv is computed as P = (F - 1 /2) + struct Tensor* conv1out = tensorConvolution(input, conv1filter, 5, 5, 4, 4); + printTensorDims(conv1out); + + struct Tensor* conv1bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 96, 1, 1); + struct Tensor* conv1bias_out = tensorAdd(conv1out, conv1bias); + + struct Tensor* relu1out = tensorRelu(conv1bias_out); + + // NOTE: These parameters are a deviation from the original paper + // The parameters match the alexnet TF model + // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal + unsigned int LRN_window = 5; + double LRN_alpha = 2e-05; + double LRN_beta = 0.75; + double LRN_k = 1.0; + struct Tensor* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k); + printTensorDims(lrn1out); + + struct Tensor* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2); + + /****** End of Conv Layer 1 ********** + + struct Tensor** splits = tensorSplit(maxpool1out, 2, 1); + + struct Tensor* conv2W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 48, 5, 5); + struct Tensor** conv2fils = tensorSplit(conv2W, 2, 0); + + struct Tensor* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1); + printTensorDims(conv2a_out); + + struct Tensor* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1); + printTensorDims(conv2b_out); + + struct Tensor* conv2_outs[2]; + conv2_outs[0] = conv2a_out; + conv2_outs[1] = conv2b_out; + + struct Tensor* conv2_concat_out = tensorConcat(conv2_outs, 2, 1); + printTensorDims(conv2_concat_out); + + struct Tensor* conv2bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1); + struct Tensor* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); + struct Tensor* relu2out = tensorRelu(conv2bias_out); + struct Tensor* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k); + printTensorDims(lrn2out); + + struct Tensor* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2); + printTensorDims(maxpool2out); + + /******** End of Conv Layer 2 ************ + + struct Tensor* conv3filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 256, 3, 3); + struct Tensor* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1); + + struct Tensor* conv3bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1); + struct Tensor* conv3bias_out = tensorAdd(conv3_out, conv3bias); + struct Tensor* relu3out = tensorRelu(conv3bias_out); + printTensorDims(relu3out); + + /********* End of Conv layer 3 ****** + + struct Tensor** splits2 = tensorSplit(relu3out, 2, 1); + + struct Tensor* conv4W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 192, 3, 3); + struct Tensor** conv4fils = tensorSplit(conv4W, 2, 0); + + printTensorDims(splits2[0]); + printTensorDims(conv4fils[0]); + + struct Tensor* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1); + printTensorDims(conv4a_out); + + struct Tensor* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1); + printTensorDims(conv4b_out); + + struct Tensor* conv4_outs[2]; + conv4_outs[0] = conv4a_out; + conv4_outs[1] = conv4b_out; + + struct Tensor* conv4_concat_out = tensorConcat(conv4_outs, 2, 1); + printTensorDims(conv4_concat_out); + + struct Tensor* conv4bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1); + struct Tensor* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias); + struct Tensor* relu4out = tensorRelu(conv4bias_out); + printTensorDims(relu4out); + + /********* End of Conv layer 4 ****** + + struct Tensor** splits3 = tensorSplit(relu4out, 2, 1); + + struct Tensor* conv5W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 192, 3, 3); + struct Tensor** conv5fils = tensorSplit(conv5W, 2, 0); + + printTensorDims(splits3[0]); + printTensorDims(conv5fils[0]); + + struct Tensor* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1); + printTensorDims(conv5a_out); + + struct Tensor* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1); + printTensorDims(conv5b_out); + + struct Tensor* conv5_outs[2]; + conv5_outs[0] = conv5a_out; + conv5_outs[1] = conv5b_out; + + struct Tensor* conv5_concat_out = tensorConcat(conv5_outs, 2, 1); + printTensorDims(conv5_concat_out); + + struct Tensor* conv5bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1); + struct Tensor* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); + struct Tensor* relu5out = tensorRelu(conv5bias_out); + printTensorDims(relu5out); + + struct Tensor* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2); + printTensorDims(maxpool5out); + + /********* End of Conv layer 5 ****** + + struct Tensor* fc1_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 1, 1, 256*6*6, 4096); + struct Tensor* gemm1out = tensorGemm(maxpool5out, fc1_weights); + printTensorDims(gemm1out); + + struct Tensor* bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 1, 1, 1, 4096); + struct Tensor* gemm1biasout = tensorGemmBias(gemm1out, bias); + printTensorDims(gemm1biasout); + + struct Tensor* relu6out = tensorRelu(gemm1biasout); + printTensorDims(relu6out); + + /***** End of FC1 layer ******** + + struct Tensor* fc2_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 1, 1, 4096, 4096); + struct Tensor* gemm2out = tensorGemm(relu6out, fc2_weights); + printTensorDims(gemm2out); + + struct Tensor* bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 1, 1, 1, 4096); + struct Tensor* gemm2biasout = tensorGemmBias(gemm2out, bias2); + printTensorDims(gemm2biasout); + + struct Tensor* relu7out = tensorRelu(gemm2biasout); + printTensorDims(relu7out); + + /***** End of FC2 layer ******** + + struct Tensor* fc3_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 1, 1, 4096, 1000); + struct Tensor* gemm3out = tensorGemm(relu7out, fc3_weights); + printTensorDims(gemm3out); + + struct Tensor* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 1, 1, 1, 1000); + struct Tensor* gemm3biasout = tensorGemmBias(gemm3out, bias3); + printTensorDims(gemm3biasout); + + /******** End of FC3 Layer ********** + struct Tensor* result = tensorSoftmax(gemm3biasout); + printTensorDims(result); + +} */ + + + +void printLikelihood(char* labels_file, int num_labels, void* result_ptr){ + + struct Tensor* result = (struct Tensor*) result_ptr; + + size_t batch_dim = result->dims.dim_sizes[0]; + size_t channels = result->dims.dim_sizes[1]; + float* data = (float*) result->host_data; + + for(int i = 0; i < batch_dim; i++){ + int chosen = 0; + for (int id = 1; id < channels; ++id){ + if (data[i * channels + chosen] < data[i * channels + id]) chosen = id; + } + + printf("** chosen = %d, label = %f, label+3 = %f \n", + chosen, data[chosen], data[chosen+3]); + } + + //float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; + //printf("****** Accuracy = %f \n\n", accuracy); +} + + +//--- Results not matching +// *** CHECK: +// 1) cudnnCrossCorrelation vs cudnnConvolution +// 2) Weights +// 3) Tensor outputs +// 4) Data layouts + + + +/*** NOTE: REFERECNCE ARCHITECTURE **/ +// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem +void testAlexnet3(){ + + int test_batch_size = 2; + int conv_mode = 1; // CROSS_CORRELATION matches the TF conv2d implementation + int conv_precision = 0; // floating point precision for convolution + + printf("****** AlexNet Architecture 3 ******** \n\n"); + void* input = readTrainedWeights("../alexnet/params/combined_imgs.bin", + CUDNN_DATA_FLOAT, test_batch_size, 3, 227, 227); + dumpWeightsToFile("tensors_out/input.out", input); + + /****** Start of Layer 1 ****************/ + void* conv1filter = readTrainedWeights("../alexnet/params/conv1.bin", + CUDNN_DATA_FLOAT, 96, 3, 11, 11); + + printTensorDims(conv1filter); + dumpWeightsToFile("tensors_out/conv1filter.out", conv1filter); + + // NOTE: the trained model does NOT have any padding in this conv + void* conv1out = tensorConvolution(input, conv1filter, 4, 4, 4, 4, + conv_mode, conv_precision); + printTensorDims(conv1out); + + void* conv1bias = readTrainedWeights("../alexnet/params/conv1.bias.bin", + CUDNN_DATA_FLOAT, 1, 96, 1, 1); + void* conv1bias_out = tensorAdd(conv1out, conv1bias); + + dumpWeightsToFile("tensors_out/conv1_init.out", conv1out); + + void* relu1out = tensorRelu(conv1bias_out); + printTensorDims(relu1out); + dumpWeightsToFile("tensors_out/conv1.out", relu1out); + + // NOTE: These parameters are a deviation from the original paper + // The parameters match the alexnet TF model + // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal + unsigned int LRN_window = 5; + double LRN_alpha = 2e-05 * LRN_window; + double LRN_beta = 0.75; + double LRN_k = 1.0; + + // TEST-point - Compare TF vs CUDNN + void* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k); + printTensorDims(lrn1out); + dumpWeightsToFile("tensors_out/lrn1.out", lrn1out); + + void* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2); + printTensorDims(maxpool1out); + dumpWeightsToFile("tensors_out/maxpool1.out", maxpool1out); + + /****** End of Conv Layer 1 ***********/ + + // TEST-point + void** splits = tensorSplit(maxpool1out, 2, 1); + + void* concat_test1 = tensorConcat(splits, 2, 1); + compareTensors(maxpool1out, concat_test1); + + void* conv2W = readTrainedWeights("../alexnet/params/conv2.bin", + CUDNN_DATA_FLOAT, 256, 48, 5, 5); + + dumpWeightsToFile("tensors_out/conv2filter.out", conv2W); + + // TEST point - compare split convolution across TF vs cuDNN + void** conv2fils = tensorSplit(conv2W, 2, 0); + + void* concat_test2 = tensorConcat(conv2fils, 2, 0); + compareTensors(conv2W, concat_test2); + + // NOTE: Padding for same conv is computed as P = ((F - 1) / 2) + void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv2a_out); + + void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv2b_out); + + void* conv2_outs[2]; + conv2_outs[0] = conv2a_out; + conv2_outs[1] = conv2b_out; + + // Test point + void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1); + printTensorDims(conv2_concat_out); + dumpWeightsToFile("tensors_out/conv2_init.out", conv2_concat_out); + + void* conv2bias = readTrainedWeights("../alexnet/params/conv2.bias.bin", + CUDNN_DATA_FLOAT, 1, 256, 1, 1); + void* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); + printTensorDims(conv2bias_out); + + dumpWeightsToFile("tensors_out/conv2_bias_init.out", conv2bias_out); + + void* relu2out = tensorRelu(conv2bias_out); + dumpWeightsToFile("tensors_out/conv2.out", relu2out); + printTensorDims(relu2out); + + void* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k); + printTensorDims(lrn2out); + + void* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2); + printTensorDims(maxpool2out); + + /******** End of Conv Layer 2 *************/ + + void* conv3filter = readTrainedWeights("../alexnet/params/conv3.bin", + CUDNN_DATA_FLOAT, 384, 256, 3, 3); + void* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1, + conv_mode, conv_precision); + + void* conv3bias = readTrainedWeights("../alexnet/params/conv3.bias.bin", + CUDNN_DATA_FLOAT, 1, 384, 1, 1); + void* conv3bias_out = tensorAdd(conv3_out, conv3bias); + void* relu3out = tensorRelu(conv3bias_out); + dumpWeightsToFile("tensors_out/conv3.out", relu3out); + printTensorDims(relu3out); + + /********* End of Conv layer 3 *******/ + + void** splits2 = tensorSplit(relu3out, 2, 1); + + void* conv4W = readTrainedWeights("../alexnet/params/conv4.bin", + CUDNN_DATA_FLOAT, 384, 192, 3, 3); + void** conv4fils = tensorSplit(conv4W, 2, 0); + + printTensorDims(splits2[0]); + printTensorDims(conv4fils[0]); + + // Test-point DOES the pairing of splits and filters make sense? + void* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv4a_out); + + void* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv4b_out); + + void* conv4_outs[2]; + conv4_outs[0] = conv4a_out; + conv4_outs[1] = conv4b_out; + + void* conv4_concat_out = tensorConcat(conv4_outs, 2, 1); + printTensorDims(conv4_concat_out); + + void* conv4bias = readTrainedWeights("../alexnet/params/conv4.bias.bin", + CUDNN_DATA_FLOAT, 1, 384, 1, 1); + void* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias); + + void* relu4out = tensorRelu(conv4bias_out); + printTensorDims(relu4out); + + /********* End of Conv layer 4 *******/ + + void** splits3 = tensorSplit(relu4out, 2, 1); + + void* conv5W = readTrainedWeights("../alexnet/params/conv5.bin", + CUDNN_DATA_FLOAT, 256, 192, 3, 3); + void** conv5fils = tensorSplit(conv5W, 2, 0); + + printTensorDims(splits3[0]); + printTensorDims(conv5fils[0]); + + void* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv5a_out); + + void* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv5b_out); + + void* conv5_outs[2]; + conv5_outs[0] = conv5a_out; + conv5_outs[1] = conv5b_out; + + void* conv5_concat_out = tensorConcat(conv5_outs, 2, 1); + printTensorDims(conv5_concat_out); + + void* conv5bias = readTrainedWeights("../alexnet/params/conv5.bias.bin", + CUDNN_DATA_FLOAT, 1, 256, 1, 1); + void* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); + void* relu5out = tensorRelu(conv5bias_out); + printTensorDims(relu5out); + + void* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2); + printTensorDims(maxpool5out); + + /********* End of Conv layer 5 *******/ + + // Test-point: I suspect the data may not be layed out correct (either in file or after loading) + void* fc1_weights = readTrainedWeights("../alexnet/params/fc1.bin", + CUDNN_DATA_FLOAT, 1, 1, 256*6*6, 4096); + void* gemm1out = tensorGemm(maxpool5out, fc1_weights); + printTensorDims(gemm1out); + + void* bias = readTrainedWeights("../alexnet/params/fc1.bias.bin", + CUDNN_DATA_FLOAT, 1, 1, 1, 4096); + + void* gemm1biasout = tensorGemmBias(gemm1out, bias); + printTensorDims(gemm1biasout); + + void* relu6out = tensorRelu(gemm1biasout); + printTensorDims(relu6out); + + /***** End of FC1 layer *********/ + + void* fc2_weights = readTrainedWeights("../alexnet/params/fc2.bin", + CUDNN_DATA_FLOAT, 1, 1, 4096, 4096); + void* gemm2out = tensorGemm(relu6out, fc2_weights); + printTensorDims(gemm2out); + + void* bias2 = readTrainedWeights("../alexnet/params/fc2.bias.bin", + CUDNN_DATA_FLOAT, 1, 1, 1, 4096); + void* gemm2biasout = tensorGemmBias(gemm2out, bias2); + printTensorDims(gemm2biasout); + + void* relu7out = tensorRelu(gemm2biasout); + printTensorDims(relu7out); + + /***** End of FC2 layer *********/ + + void* fc3_weights = readTrainedWeights("../alexnet/params/fc3.bin", + CUDNN_DATA_FLOAT, 1, 1, 4096, 1000); + void* gemm3out = tensorGemm(relu7out, fc3_weights); + printTensorDims(gemm3out); + + void* bias3 = readTrainedWeights("../alexnet/params/fc3.bias.bin", + CUDNN_DATA_FLOAT, 1, 1, 1, 1000); + void* gemm3biasout = tensorGemmBias(gemm3out, bias3); + printTensorDims(gemm3biasout); + + /******** End of FC3 Layer ***********/ + void* result = tensorSoftmax(gemm3biasout); + printTensorDims(result); + + // FIXIT: Pass file with the labels + printLikelihood("", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph + printf("END of Alexnet3 -- \n"); +} + + + + + +int main(){ + + // IMP-NOTE: Always initialize the runtime + initializeRuntime(0); + + //testAlexnet1(); + //testAlexnet2(); + testAlexnet3(); + + return 0; +} diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..c0fee9b659db9ff45f56b75b989fbbed68523d43 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_half.cc @@ -0,0 +1,74 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + + +/* NOTE: Reference Architecture to use for profiling */ +void testFC_half(){ + + printf("********* Fully Connected DNN-1 ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + + void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorHgemm(input, fc1_weights); + printTensorDims(fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + printTensorDims(fc1_bias_out); + + void* fc1_relu = tensorRelu(fc1_bias_out); + printTensorDims(fc1_relu); + + // Layer-2 + void* fc2out = tensorHgemm(fc1_relu, fc2_weights); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + printTensorDims(fc2_bias_out); + + void* fc2_relu = tensorRelu(fc2_bias_out); + printTensorDims(fc2_relu); + + void* result = tensorSoftmax(fc2_relu); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", + test_batch_size, result); +} + + + +int main(){ + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + testFC_half(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network.cc new file mode 100644 index 0000000000000000000000000000000000000000..e8b70146a10359bf2df7420ae388325e6a658557 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network.cc @@ -0,0 +1,152 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + + +void testFCNetworkArchCPU(){ + + printf("********* Fully Connected DNN-1 ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + //dumpWeightsToFile("tensors_out/input_fc.out", input); + //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights); + + printTensorDims(input); + printTensorDims(fc1_weights); + + // Start profiling tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmCPU(input, fc1_weights); + printTensorDims(fc1out); + //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out); + printTensorDims(fc1_bias_out); + + void* fc1_relu = tensorRelu(fc1_bias_out); + //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu); + printTensorDims(fc1_relu); + + // Layer-2 + void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights); + //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out); + printTensorDims(fc2_bias_out); + + void* fc2_relu = tensorRelu(fc2_bias_out); + //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu); + printTensorDims(fc2_relu); + + void* result = tensorSoftmax(fc2_relu); + printTensorDims(result); + + // stopProfiling + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + + +/* NOTE: Reference Architecture to use for profiling */ +void testFCNetworkArchGPU(){ + + printf("********* Fully Connected DNN-1 ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + + void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + printTensorDims(fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out); + printTensorDims(fc1_bias_out); + + void* fc1_relu = tensorRelu(fc1_bias_out); + //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu); + printTensorDims(fc1_relu); + + // IMPORTANT: Adding errors to the FC1 layer output + //tensorAddError(fc1_relu, 3); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out); + printTensorDims(fc2_bias_out); + + void* fc2_relu = tensorRelu(fc2_bias_out); + //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu); + printTensorDims(fc2_relu); + + void* result = tensorSoftmax(fc2_relu); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + + +int main(){ + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + //testFCNetworkArchCPU(); + + testFCNetworkArchGPU(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network2.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network2.cc new file mode 100644 index 0000000000000000000000000000000000000000..fc00532a1b3712fab9d098a9a8e1a1586f1458a5 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network2.cc @@ -0,0 +1,94 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + +void test4LayerFC(){ + + printf("********* 4-layer FC Network ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + float_type, + test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin", + float_type, 1, 1, 784, 1000); + void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin", + float_type, 1, 1000, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin", + float_type, 1, 1, 1000, 500); + void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin", + float_type, 1, 500, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin", + float_type, 1, 1, 500, 200); + void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin", + float_type, 1, 200, 1, 1); + void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin", + float_type, 1, 1, 200, 10); + void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin", + float_type, 1, 10, 1, 1); + + //dumpWeightsToFile("tensors_out/input_fc.out", input); + //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights); + + // Start Profiling execution times of Tensor operations + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + printTensorDims(fc1out); + //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + printTensorDims(fc1_bias_out); + //dumpWeightsToFile("tensors_out/fc_fc1.out", fc1_bias_out); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_bias_out, fc2_weights); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + printTensorDims(fc2_bias_out); + + // Layer-3 + void* fc3out = tensorGemmGPU(fc2_bias_out, fc3_weights); + printTensorDims(fc3out); + + void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); + printTensorDims(fc3_bias_out); + + // Layer-4 + void* fc4out = tensorGemmGPU(fc3_bias_out, fc4_weights); + printTensorDims(fc4out); + + void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); + printTensorDims(fc4_bias_out); + + void* result = tensorSoftmax(fc4_bias_out); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); +} + + + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + test4LayerFC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network3.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network3.cc new file mode 100644 index 0000000000000000000000000000000000000000..531bb01695cddb70de0f9bea90f6b229679e9bce --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network3.cc @@ -0,0 +1,93 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" + + +void test4LayerFC(){ + + printf("********* 4-layer FC Network ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + float_type, + test_batch_size, 1, 28, 28); + void* fc1_weights = readTrainedWeights("../model_params/FC_network3/fc1.bin", + float_type, 1, 1, 784, 512); + void* fc1_bias = readTrainedWeights("../model_params/FC_network3/fc1_bias.bin", + float_type, 1, 512, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network3/fc2.bin", + float_type, 1, 1, 512, 256); + void* fc2_bias = readTrainedWeights("../model_params/FC_network3/fc2_bias.bin", + float_type, 1, 256, 1, 1); + void* fc3_weights = readTrainedWeights("../model_params/FC_network3/fc3.bin", + float_type, 1, 1, 256, 128); + void* fc3_bias = readTrainedWeights("../model_params/FC_network3/fc3_bias.bin", + float_type, 1, 128, 1, 1); + void* fc4_weights = readTrainedWeights("../model_params/FC_network3/fc4.bin", + float_type, 1, 1, 128, 10); + void* fc4_bias = readTrainedWeights("../model_params/FC_network3/fc4_bias.bin", + float_type, 1, 10, 1, 1); + + // Start Profiling execution times of Tensor operations + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + printTensorDims(fc1out); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + printTensorDims(fc1_bias_out); + void* fc1_relu = tensorRelu(fc1_bias_out); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + printTensorDims(fc2out); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + printTensorDims(fc2_bias_out); + void* fc2_relu = tensorRelu(fc2_bias_out); + + // Layer-3 + void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); + printTensorDims(fc3out); + + void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); + printTensorDims(fc3_bias_out); + void* fc3_relu = tensorRelu(fc3_bias_out); + + // Layer-4 + void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights); + printTensorDims(fc4out); + + void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); + printTensorDims(fc4_bias_out); + void* fc4_relu = tensorRelu(fc4_bias_out); + + void* result = tensorSoftmax(fc4_relu); + printTensorDims(result); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); +} + + + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + test4LayerFC(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet.cc new file mode 100644 index 0000000000000000000000000000000000000000..e21b09fbf59c6ceee2adcf6df798ef04351a03ef --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet.cc @@ -0,0 +1,178 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenetArch2(){ + + printf("********* Lenet Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin", + CUDNN_DATA_FLOAT, 20, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin", + CUDNN_DATA_FLOAT, 1, 20, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin", + CUDNN_DATA_FLOAT, 50, 20, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin", + CUDNN_DATA_FLOAT, 1, 50, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin", + CUDNN_DATA_FLOAT, 1, 1, 800, 500); + void* fc1_bias = readTrainedWeights("../model_params/lenet_params/ip1.bias.bin", + CUDNN_DATA_FLOAT, 1, 1, 1, 500); + void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin", + CUDNN_DATA_FLOAT, 1, 1, 500, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet_params/ip2.bias.bin", + CUDNN_DATA_FLOAT, 1, 1, 1, 10); + + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1, + conv_mode, conv_precision); + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + printTensorDims(conv1out); + + void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + + printTensorDims(conv2out); + + void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool2out); + + void* gemm1out = tensorGemm(pool2out, fc1_weights); + printTensorDims(gemm1out); + + void* gemm1biasout = tensorGemmBias(gemm1out, fc1_bias); + printTensorDims(gemm1biasout); + + void* relu1out = tensorRelu(gemm1biasout); + printTensorDims(relu1out); + + void* gemm2out = tensorGemm(relu1out, fc2_weights); + printTensorDims(gemm2out); + + void* gemm2_biasout = tensorGemmBias(gemm2out, fc2_bias); + printTensorDims(gemm2_biasout); + + void* result = tensorSoftmax(gemm2_biasout); + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + +/* This architecture REMOVES the bias adds */ +void testLenetArch3(){ + + printf("********* Lenet Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin", + CUDNN_DATA_FLOAT, 20, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin", + CUDNN_DATA_FLOAT, 1, 20, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin", + CUDNN_DATA_FLOAT, 50, 20, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin", + CUDNN_DATA_FLOAT, 1, 50, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin", + CUDNN_DATA_FLOAT, 1, 1, 800, 500); + void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin", + CUDNN_DATA_FLOAT, 1, 1, 500, 10); + + /* Convolution specific parameters */ + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1, + conv_mode, conv_precision); + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + printTensorDims(conv1out); + + void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool1out); + + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1, + conv_mode, conv_precision); + printTensorDims(conv2out); + + void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool2out); + + void* gemm1out = tensorGemm(pool2out, fc1_weights); + printTensorDims(gemm1out); + + void* relu1out = tensorRelu(gemm1out); + printTensorDims(relu1out); + + void* gemm2out = tensorGemm(relu1out, fc2_weights); + printTensorDims(gemm2out); + + void* result = tensorSoftmax(gemm2out); + printTensorDims(result); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph + printf("END of Lenet Arch3 -- \n"); +} + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + //testTensorAdd(); + //testTensorConv(); + //testTensorPool(); + //testTensorGemm(); + //testTensorGemmBias(); + //testTensorRelu(); + //testTensorSoftmax(); + + //testLenetArch(); + testLenetArch2(); + //testLenetArch3(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet2.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet2.cc new file mode 100644 index 0000000000000000000000000000000000000000..77f256320e158fb13555e83d0fbe260ce9d3a83f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet2.cc @@ -0,0 +1,111 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + + +/* NOTE: Reference Architecture to use for profiling */ +void testLenet2Arch(){ + + printf("********* Lenet-2 Architecture ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", + CUDNN_DATA_FLOAT, + test_batch_size, 1, 28, 28); + + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class + void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin", + float_type, 32, 1, 5, 5); + void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin", + float_type, 1, 32, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin", + float_type, 64, 32, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin", + float_type, 1, 64, 1, 1); + void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin", + float_type, 1, 1, 7*7*64, 1024); + void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin", + float_type, 1, 1024, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin", + float_type, 1, 1, 1024, 10); + void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + + // NOTE: For tensorAdd, the only dimension that MUST match is channels + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + printTensorDims(conv1out); + + void* conv1_reluout = tensorRelu(conv1out); + //dumpWeightsToFile("tensors_out/conv1.out", conv1_reluout); + + void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool1out); + // NOTE: input channels have to match between tensor op inputs and outputs + void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + printTensorDims(conv2out); + + void* conv2_reluout = tensorRelu(conv2out); + //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout); + + void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2); + printTensorDims(pool2out); + //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); + + void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); + printTensorDims(gemm1out); + //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + printTensorDims(gemm1biasout); + + void* relu1out = tensorRelu(gemm1biasout); + printTensorDims(relu1out); + + void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); + printTensorDims(gemm2out); + + void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); + printTensorDims(gemm2_biasout); + + void* result = tensorSoftmax(gemm2_biasout); + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", + test_batch_size, result); + // THINK: I believe that comparing the results do not need to be part of the HPVM graph +} + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + testLenet2Arch(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..ee4cc95d411885a8adb5a7d3ec0ed9646d7b005b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc @@ -0,0 +1,425 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + +void testTensorGemm(){ + + printf("***** TensorSgemm ***** \n\n"); + void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1); + struct Tensor* lhs = (struct Tensor*) lhs_ptr; + fillTensorWithOnes(lhs); + + float* data_arr = (float*) lhs->host_data; + for(int i = 0; i < lhs->num_elems; i++){ + data_arr[i] = (i / 4) + 1; + } + + void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3); + fillTensorWithOnes(rhs); + + void* output = tensorGemmCPU(lhs, rhs); + printTensorValues(output); + + void* bias_ptr = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 1, 1); + struct Tensor* bias = (struct Tensor*) bias_ptr; + fillTensorWithOnes(bias); + + float* bias_arr = (float*) bias->host_data; + for(int i = 0; i < bias->num_elems; i++){ + bias_arr[i] = i + 1; + } + + void* output2 = tensorAdd(output, bias); + printTensorValues(output2); +} + + +void testTensorHgemm(){ + + printf("***** TensorHgemm ***** \n\n"); + void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1); + struct Tensor* lhs = (struct Tensor*) lhs_ptr; + fillTensorWithOnes(lhs); + + float* data_arr = (float*) lhs->host_data; + for(int i = 0; i < lhs->num_elems; i++){ + data_arr[i] = (i / 4) + 1; + } + + void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3); + fillTensorWithOnes(rhs); + + void* output = tensorHalfGemm(lhs, rhs); + printTensorValues(output); + + void* bias_ptr = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 1, 1); + struct Tensor* bias = (struct Tensor*) bias_ptr; + fillTensorWithOnes(bias); + + float* bias_arr = (float*) bias->host_data; + for(int i = 0; i < bias->num_elems; i++){ + bias_arr[i] = i + 1; + } + + void* output2 = tensorAdd(output, bias); + printTensorValues(output2); +} + + +void testTensorHgemm2(){ + + printf("***** TensorHgemm ***** \n\n"); + void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 10000, 800, 1, 1); + struct Tensor* lhs = (struct Tensor*) lhs_ptr; + + float* data_arr = (float*) lhs->host_data; + for(int i = 0; i < lhs->num_elems; i++){ + data_arr[i] = (i / 4) + 1; + } + + void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, + 1, 1, 800, 800); + fillTensorWithOnes(rhs); + + void* output = tensorHalfGemm(lhs, rhs); + //printTensorValues(output); +} + + +void testTensorSgemm2(){ + + printf("***** TensorSgemm ***** \n\n"); + void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + 10000, 800, 1, 1); + struct Tensor* lhs = (struct Tensor*) lhs_ptr; + + float* data_arr = (float*) lhs->host_data; + for(int i = 0; i < lhs->num_elems; i++){ + data_arr[i] = (i / 4) + 1; + } + + void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, + 1, 1, 800, 800); + fillTensorWithOnes(rhs); + + void* output = tensorGemmGPU(lhs, rhs); + //printTensorValues(output); +} + + + +void testTensorGemmGPU(){ + + printf("***** TensorSgemm ***** \n\n"); + void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1); + struct Tensor* lhs = (struct Tensor*) lhs_ptr; + fillTensorWithOnes(lhs); + + float* data_arr = (float*) lhs->host_data; + for(int i = 0; i < lhs->num_elems; i++){ + data_arr[i] = (i / 4) + 1; + } + + void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3); + fillTensorWithOnes(rhs); + + void* output = tensorGemmGPU(lhs, rhs); + printTensorValues(output); + + void* bias_ptr = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 1, 1); + struct Tensor* bias = (struct Tensor*) bias_ptr; + fillTensorWithOnes(bias); + + float* bias_arr = (float*) bias->host_data; + for(int i = 0; i < bias->num_elems; i++){ + bias_arr[i] = i + 1; + } + + void* output2 = tensorAdd(output, bias); + printTensorValues(output2); + +} + + + +void testTensorGemmBias(){ + + // NOTE: 2nd dim of bias and d2*d3*d4 for the input tensor MUST match + printf("***** TensorGemmBias ***** \n\n"); + void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2); + fillTensorWithOnes(input); + void* bias = create2DTensor(CUDNN_DATA_FLOAT, 1, 4); + fillTensorWithOnes(bias); + + void* output = tensorGemmBias(input, bias); + printTensorValues(output); +} + + + +void testTensorConv2(){ + + int conv_mode = 1; // CROSS_CORRELATION mode + int compute_precision = 0; // floating point precision + + void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3); + fillWithOnesAndTwos(input); + void** splits = tensorSplit(input, 2, 1); + + void* conv2W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2); + fillTensorWithOnes(conv2W); + + void** conv2fils = tensorSplit(conv2W, 2, 0); + + void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 0, 0, + 1, 1, conv_mode, compute_precision); + printTensorDims(conv2a_out); + + void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 0, 0, + 1, 1, conv_mode, compute_precision); + printTensorDims(conv2b_out); + + void* conv2_outs[2]; + conv2_outs[0] = conv2a_out; + conv2_outs[1] = conv2b_out; + + void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1); + printTensorDims(conv2_concat_out); + printTensorValues(conv2_concat_out); + +} + + + +void testTensorConv3(){ + + int conv_mode = 1; // CROSS_CORRELATION mode + int compute_precision = 0; // floating point precision + + void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 96, 28, 28); + fillTensorWithOnes(input); + void** splits = tensorSplit(input, 2, 1); + + void* conv2W = readTrainedWeights("../alexnet/params/conv2.bin", + CUDNN_DATA_FLOAT, 256, 48, 5, 5); + + void** conv2fils = tensorSplit(conv2W, 2, 0); + + void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, + 1, 1, conv_mode, compute_precision); + printTensorDims(conv2a_out); + + void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, + 1, 1, conv_mode, compute_precision); + printTensorDims(conv2b_out); + + void* conv2_outs[2]; + conv2_outs[0] = conv2a_out; + conv2_outs[1] = conv2b_out; + + void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1); + printTensorDims(conv2_concat_out); + //printTensorValues(conv2_concat_out); + dumpWeightsToFile("tensors_out/conv2_test.out", conv2_concat_out); + + void* conv2bias = readTrainedWeights("../alexnet/params/conv2.bias.bin", + CUDNN_DATA_FLOAT, 1, 256, 1, 1); + void* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); + printTensorDims(conv2bias_out); + + dumpWeightsToFile("tensors_out/conv2_bias_test.out", conv2bias_out); + +} + + +void testLRN(){ + + void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 20, 20, 20, 20); + fillTensorWithOnes(input); + + unsigned LRN_window = 5; + double LRN_alpha = 2e-05; + printf("LRN_alpha = %f \n", LRN_alpha); + + double LRN_beta = 0.75; + double LRN_k = 1.0; + + // TEST-point - Compare TF vs CUDNN + void* lrn1out = tensorLRN(input, LRN_window, LRN_alpha, LRN_beta, LRN_k); + printTensorDims(lrn1out); + dumpWeightsToFile("tensors_out/lrn1_test.out", lrn1out); + + void* input2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 7, 7, 7, 7); + fillTensorWithOnes(input2); + + LRN_window = 5; + LRN_alpha = 0.5 * LRN_window; + + LRN_beta = 0.75; + LRN_k = 1.0; + + void* lrn2out = tensorLRN(input2, LRN_window, LRN_alpha, LRN_beta, LRN_k); + printTensorDims(lrn2out); + dumpWeightsToFile("tensors_out/lrn2_test.out", lrn2out); +} + + + + +void testTensorAdd(){ + + // Tensor add with equal dimensions + void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2); + void* bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2); + fillTensorWithOnes(x); + fillTensorWithOnes(bias); + + printTensorValues(x); + printTensorValues(bias); + + tensorAdd(x, bias); + printTensorValues(x); + + // Tensor addd with matching channel dimension + void* x2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 2, 2); + void* bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 1, 1); + fillTensorWithOnes(x2); + fillTensorWithOnes(bias2); + + tensorAdd(x2, bias2); + printTensorValues(x2); +} + + +void testTensorError(){ + + // Tensor add with equal dimensions + void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 128); + fillTensorWithOnes(x); + + Tensor* x_tensor = (Tensor*) x; + float* data_arr = (float*) x_tensor->host_data; + for(int i = 0; i < x_tensor->num_elems; i++){ + data_arr[i] = 0.2; + } + + tensorAddError(x, 3); + printTensorValues(x); +} + + +void testTensorConv(){ + + // NOTE: The input channel count value (param2 to Tensor and Filter) must be the same + void* x3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 4, 4); + // NOTE: Filter descriptors do NOT have batch size + // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) + void* filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, 1, 2, 2); + fillTensorWithOnes(x3); + fillTensorWithOnes(filter); + + int conv_mode = 1; // NOTE: uses CROSS_CORRELATION + int compute_precision = 0; // floating point precision for conv + + void* conv1 = tensorConvolution(x3, filter, 0, 0, + 1, 1, conv_mode, compute_precision); + printTensorValues(conv1); + + // NOTE: For cudnnTensorAdd, the only dimension that MUST match is channels + void* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1); + fillTensorWithOnes(bias3); + tensorAdd(conv1, bias3); + printTensorValues(conv1); +} + + +void testTensorPool(){ + void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 4, 4); + fillTensorWithOnes(x); + void* output = tensorPooling(x, 0, 2, 2, 0, 0, 1, 1); + printTensorValues(output); +} + + + +void testTensorRelu(){ + + // NOTE: 2nd dim of bias and d2*d3*d4 for the input tensor MUST match + printf("***** TensorRelu ***** \n\n"); + void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2); + fillTensorWithNegOnes(input); + + void* output = tensorRelu(input); + printTensorValues(output); +} + + +void testTensorSoftmax(){ + + printf("***** TensorSoftmax ***** \n\n"); + void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 4, 1, 1); + + float* host_ptr = (float*) ((struct Tensor*) input)->host_data; + host_ptr[0] = 0.1; + host_ptr[1] = 0.2; + host_ptr[2] = 0.3; + host_ptr[3] = 0.4; + host_ptr[4] = 0.5; + host_ptr[5] = 0.6; + host_ptr[6] = 0.7; + host_ptr[7] = 2.5; + + void* output = tensorSoftmax(input); + printTensorValues(output); +} + + +void testSoftmaxOutput(void* output_ptr){ + + struct Tensor* output = (struct Tensor*) output_ptr; + + size_t batch_dim = output->dims.dim_sizes[0]; + size_t channels = output->dims.dim_sizes[1]; + + float* data = (float*) output->host_data; + for(int i = 0; i < batch_dim; i++){ + float sum = 0.0; + for(int j = 0; j < channels; j++){ + sum += data[i * channels + j]; + } + printf("output_sum = %f \n", sum); + } + +} + + + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + startProfiling(); + + testTensorHgemm2(); + testTensorSgemm2(); + testTensorConv(); + testTensorError(); + + //testTensorGemm(); + //testTensorGemmGPU(); + //testTensorGemmBias(); + //testTensorConv2(); + //testTensorConv3(); + //testLRN(); + + stopProfiling(); + + return 0; +} diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/tuning_src/fc_network_acc.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/tuning_src/fc_network_acc.cc new file mode 100644 index 0000000000000000000000000000000000000000..7ab357e4ac6e8b6550a71b14ce73c79e20879cf3 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/tuning_src/fc_network_acc.cc @@ -0,0 +1,108 @@ + + +#include <stdio.h> +#include <cstdlib> +#include <stdlib.h> +#include <unistd.h> + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" +#include "../include/types.h" +#include "../include/op_overheads.h" + + + + +/* NOTE: Reference Architecture to use for profiling */ +void testFCNetworkArch(int op1_acc, int op2_acc, int op3_acc, + int op4_acc, int op5_acc, int op6_acc, int op7_acc){ + + printf("********* Fully Connected DNN-1 - Accuracy Tuned ********* \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 10000; + void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", + float_type, test_batch_size, 1, 28, 28); + + void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", + float_type, 1, 1, 784, 128); + void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", + float_type, 1, 128, 1, 1); + void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", + float_type, 1, 1, 128, 10); + void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", + float_type, 1, 10, 1, 1); + + // Start execution profiling Tensor ops + startProfiling(); + + // Layer-1 + void* fc1out = tensorGemmGPU(input, fc1_weights); + void* error_norms = tensorAddError(fc1out, op1_acc); + add_norms(error_norms); + add_gemm_overheads(input, fc1_weights, op1_acc); + + void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); + error_norms = tensorAddError(fc1_bias_out, op2_acc); + add_norms(error_norms); + add_bias_overheads(fc1_bias_out, op2_acc); + + void* fc1_relu = tensorRelu(fc1_bias_out); + error_norms = tensorAddError(fc1_relu, op3_acc); + add_norms(error_norms); + add_relu_overheads(fc1_relu, op3_acc); + + // Layer-2 + void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); + error_norms = tensorAddError(fc2out, op4_acc); + add_norms(error_norms); + add_gemm_overheads(fc1_relu, fc2_weights, op4_acc); + + void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); + error_norms = tensorAddError(fc2_bias_out, op5_acc); + add_norms(error_norms); + add_bias_overheads(fc2_bias_out, op5_acc); + + void* fc2_relu = tensorRelu(fc2_bias_out); + error_norms = tensorAddError(fc2_relu, op6_acc); + add_norms(error_norms); + add_relu_overheads(fc2_relu, op6_acc); + + void* result = tensorSoftmax(fc2_relu); + error_norms = tensorAddError(result, op7_acc); + add_norms(error_norms); + add_bias_overheads(result, op7_acc); + + stopProfiling(); + + computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); + + dump_result("accuracy_summary"); +} + + + +int main(int argc, char* argv[]){ + + if(argc < 8){ + printf("Must provide 7 knobs for accuracy tuning \n"); + abort(); + } + + // This initializes the runtime - must be called before anything + llvm_hpvm_initTensorRt(0); + + int op1_acc = atoi(argv[1]); + int op2_acc = atoi(argv[2]); + int op3_acc = atoi(argv[3]); + int op4_acc = atoi(argv[4]); + int op5_acc = atoi(argv[5]); + int op6_acc = atoi(argv[6]); + int op7_acc = atoi(argv[7]); + + testFCNetworkArch(op1_acc, op2_acc, op3_acc, + op4_acc, op5_acc, op6_acc, op7_acc); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..17b5b1e6bfbccd08a42fdf7ee241a7742e764ffb Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..5187944d335d316a3d5a4015d7da69e425878347 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe15f7c890cdc9c6e1afd83dc50b8c1308a55dcc Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c7278911331c715598268586202b1d95aa5ef58 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3.bin new file mode 100644 index 0000000000000000000000000000000000000000..a11cdfff7f2e6f1f70fc37e8c0da9b3997116f27 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..493f78d98eec57da2bb3004079a64f0584ea60d9 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f940102eb8a17051c44e8fe12a6b7730a61c15d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd3305efb194f19475cb0a260f845efc8bd986e7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4_bias.bin @@ -0,0 +1 @@ +Ǜ±¿žÓ?Ü‚?Sˆ¾6;bZö>c÷”¿¡™Ê<`–i? \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..d24151936ec9c89e260439d55edf42d2dc55723f Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..9cca044999ee589bbfdfada84db040751559cd26 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fc42c720505d595c80dc426cc739dcdc5e5c7e2 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb2e7856b366465f0574ad68d71c88432d021b27 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2_bias.bin @@ -0,0 +1 @@ +°F¾Êgw>I‹q<?N½»ƒ¾Ò…¹> „m½É > ݆¾ÝH/½ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/mnist_float_input.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/mnist_float_input.bin new file mode 100644 index 0000000000000000000000000000000000000000..779dcf7f6ad72f3e22d5c96148d2f0f7e11e39b8 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/mnist_float_input.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a2acb0bdfd5b2073eaad1ec51b99faf5a60ae07 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b20a053997e30f768995fbf4f27ab6b04f8403f Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd955fc10e8727ed4bf41100f7b74d2026d0cad6 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..e225b78da4cf02f9500020edbd2a6f68a03ab7aa Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3b5c08ee1330f8f33d24f12aad8c78437a54a28 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3a7f0b8f93603a474ac3ef23a48b550de1e2327 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4.bin new file mode 100644 index 0000000000000000000000000000000000000000..f92c14bde53bd000f8d3f7993373fe6797dd4921 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..6459c295b4dc106194c17ce55c363caded6b8bc8 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc4e57ac0a859851103667d6eb8cc835b70e04de Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..3acb43f03590d809b55df5a4cc264a1d4f8318ba Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..ccac40a8f33803d941fa0041c8568ea589fdd945 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..3aff63b21593ec7ebd0c04f41f151bad113cc2e4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2_bias.bin @@ -0,0 +1 @@ +ã¿ê¥>¿½×J? ñ"¿Rùû¾8‰Š¼=ìß>qO¿ÂB? \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..be2731c02774a6ed6c554121cb507ca753b87144 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea4933ab67c82010bde969df1028adb828c0a44d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..d69299cae0826fc57a32a3bc389bcf25603d9bc6 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..1001ecf3ac16b388f4619d12bfeab4ca7db3e726 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd97ce049b50c71430f4db63219931e168e05515 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..460b7fe40bd60902338b99aed2446ed746c1c8ea --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3_bias.bin @@ -0,0 +1 @@ +8I¿˜jc?—ì>¢_ѽDÒ=b"¿Hý½ÊMõ> xo¿3*? \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..56ec40072906c21991c9bee2985651f6fadeaba0 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..421dd8c3782c2acfe019530565c2855cdccd3bf0 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf18f1c5d631ce9734cfc067b503c7d9484fd1ba Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fce451559ed38bfa5d642f6df6d13e270a49c4c Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3.bin new file mode 100644 index 0000000000000000000000000000000000000000..57938a2e2db17982f9646ea57cb2551c41f1ce8f Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5ae2b4abd77063bd80cda4a9321c62d5a42070b Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ed4b5a50917b127351dc7a673a8c87ac8ddedd6 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..0633a1a2bf444ecbe6e0b5896c92b72a5fc7ecdb --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4_bias.bin @@ -0,0 +1 @@ +Éìù¾°ŸÐ>-¾¤‹ó¾}*&¿ñf¿C/˜¾i¤à½v”=?®éJ¾ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..670261107ead208daa54e1471a1818b098315ee7 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4469ef562dae74e197fa44fb8ac762ac5c4a288 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1_bias.bin @@ -0,0 +1,3 @@ +òÙ¼W½Bì;Çz$ k'a=a¸î½`Ôƒ¸v£Žµ¤ÿ¾† W¾ƒLl½ÇÄž=?š¿Zîß=<ò!¿x”"¿Ž!”‹Á +8½à’¼u¶¾d쿃]¼Ffr¾ZÅ×½#¶½^žº±Ø½j5¼€) +¿æc4¿lÿå¾uú½ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e03b76e819f700dc0e5a23919e1ec277ec4774b Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a8053c9575fcff5134dbb921accaf37fd294e46 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2_bias.bin @@ -0,0 +1,2 @@ +œ§Y¾¨ü¸¾ Ѽ£2߸Z—‹Ê÷˜,V5=]±¾¿ë„¾¨çE§ +¨¹²Z`¾MÑϾȼߖ‹U”‚¾"Rƒ<2À¾4Áª»íž)=L¹E¬Ç …½[ÚA¾‰ ¸½!âs½]1¼›²#¾o"Ÿ†u=]Öéº-ìý½”*»A9ú½¼ö½¾d6Š<ˆ1æ‹Ìf¾c«¬<>R“‹M޾K®½Mÿù½j¾-½O™…¾Ì8!½ßò"¾Dqú¹Œ|¢¾Cæ.½öýZ¾»8\¾p½‹Ó-ν¯*÷½}'r½ú'“»e›”‹5î¼)/½>¤>bÓÏ«Öaô¤¶'¾ØÞ¿¾ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e27b6e9147a65e1db3477bfda0a0a1274783215 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..a28db7e3aeb6e2892d31122603a83667995da874 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..01774f984fdf39eaeba5023caa01fe203046667f Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..d59c5dee69dffdc9ca49a0707244595074e1a471 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2_bias.bin @@ -0,0 +1,2 @@ +h.6=)¿†|’¾ïë +¢_µ'>;‚n¼¤Oò¼6T®=t#M¾D{°¾ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..202d11adb809f2035db1d4d09806661b8bd978cb Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..11ac18222cac00e38c809f5f132824e1000ccb50 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..bea1dfe0079e8fb80730620cad4859f5e2baaa9a Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..f537eb0cd2ec3847bcb90fab8bb5025157097b1e Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..c43543bb447c31e60545358929f4df460a1b0d9f Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..01bd440f4f16036b2ceafbbb5629ee02082ed82d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b51cb239a1423432059bd84feee57f70068d1fe Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..76535beffd242bfe579ea55cf82e80c60d871c96 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2_bias.bin @@ -0,0 +1 @@ +°Ï=ô ?«~u¿£q½;ƒÀª=G›‚?3$¿"µq¿nTJ?'jE> \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f67654d3ad5fd01f92efab6b7977ba43bdd523d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ef4abd162cf515ef3df6f9f10e0d281165d39ec --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1_bias.bin @@ -0,0 +1,3 @@ +óDž=ÀϾ ×ô»XC=Pñj>§.j=B(s»0óº¾ñ$,¼úr޾ Ëļ\øÊ=TYh»Æ¦¸¾¦Cï=+ý¾½ +źjϽ.Ud¾d*>Q¹Ò¾âÝU¾cqC<óFÓ=° +Í=k>ɑνXõ¾˜Ç¾Å›S¾Ô<¾ps¼ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc46877a1a81154f8c78f559c457a0f691289a48 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..42ee244812fd9c46f7208fd8396bf4dbf5df6197 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2_bias.bin @@ -0,0 +1,2 @@ +º†¾O=×Ä—>®¯¾š¡¾çÕ½i;H= R—¾'†(>†Ÿ<#>wc\¾ 2˾<<í¼¨þ¾Á‡–=¢;ïºÃ†›½÷¼È<øx„>¸/H¾Šw<¬”_¾™¦Ê»øJF>4ñ<ÙµD>È8(=ºà=‹°°¾©ùP¾8½Ñ/ɽɖƒ¾¯åð<ñ€‚½¸¼½=]¾ƒl¾´ÊL½Ù§…¾ªûÕ<ÊT˾¬>>_ꌾÁ¾¾Ôa>tC>ÉÛ=‘ü”½Í¾)5D¾Þ‹X½Ô&>éP‹<Þöi¾‚[j¾1_ýØO¾/¢=aÙ +¾1Z\=ãÙŽ« \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9dd2a8c001e075b52feffcd2538107a08c74932 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..77d4c31025156e35069533ef65623a6b019962df Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..92c9f125a58d324947f20b3d41754fd491c2ac63 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4cb460055bfd675b1c576c7224d00b362d1aa7f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2_bias.bin @@ -0,0 +1,2 @@ +ì@÷=nÑ>óñ#>:–¡>)(Ó½Í-¤>$Úh>5¹½¥0„¾ +꿾 \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f67654d3ad5fd01f92efab6b7977ba43bdd523d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ef4abd162cf515ef3df6f9f10e0d281165d39ec --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1_bias.bin @@ -0,0 +1,3 @@ +óDž=ÀϾ ×ô»XC=Pñj>§.j=B(s»0óº¾ñ$,¼úr޾ Ëļ\øÊ=TYh»Æ¦¸¾¦Cï=+ý¾½ +źjϽ.Ud¾d*>Q¹Ò¾âÝU¾cqC<óFÓ=° +Í=k>ɑνXõ¾˜Ç¾Å›S¾Ô<¾ps¼ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc46877a1a81154f8c78f559c457a0f691289a48 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..42ee244812fd9c46f7208fd8396bf4dbf5df6197 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2_bias.bin @@ -0,0 +1,2 @@ +º†¾O=×Ä—>®¯¾š¡¾çÕ½i;H= R—¾'†(>†Ÿ<#>wc\¾ 2˾<<í¼¨þ¾Á‡–=¢;ïºÃ†›½÷¼È<øx„>¸/H¾Šw<¬”_¾™¦Ê»øJF>4ñ<ÙµD>È8(=ºà=‹°°¾©ùP¾8½Ñ/ɽɖƒ¾¯åð<ñ€‚½¸¼½=]¾ƒl¾´ÊL½Ù§…¾ªûÕ<ÊT˾¬>>_ꌾÁ¾¾Ôa>tC>ÉÛ=‘ü”½Í¾)5D¾Þ‹X½Ô&>éP‹<Þöi¾‚[j¾1_ýØO¾/¢=aÙ +¾1Z\=ãÙŽ« \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9dd2a8c001e075b52feffcd2538107a08c74932 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..77d4c31025156e35069533ef65623a6b019962df Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..92c9f125a58d324947f20b3d41754fd491c2ac63 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4cb460055bfd675b1c576c7224d00b362d1aa7f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2_bias.bin @@ -0,0 +1,2 @@ +ì@÷=nÑ>óñ#>:–¡>)(Ó½Í-¤>$Úh>5¹½¥0„¾ +꿾 \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..321259debd6ff4d1445edcadd69cc6de53f3f03d Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..484b86517f5496d0f97f7d5af2c0ad04b869be6e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1_bias.bin @@ -0,0 +1,2 @@ +;n^?(5à¾Ärp¾øgB¿±"ì¾Vø9¿û»=Z¾>EE¿Úýš>"[€½°0r¾v!Ä>³ð¢½M°!¿±TY¿yÕ_¾š-¾"~G? 5²¾Ë~ý¾yk&¿s2K¾#¢?e +?«E¿w€=)X=4Z¾ ,ö>/b¦>„+> \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..612cedbfad68ad210388bb7c1a9825e97e673872 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..2994d417de60c4a04c671978c6cbff4c9457d9ef Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..78f283aa48f4fb58fe3bc07a5320836107269596 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbd06906d6c5a3bf16e45c01d8ccbc2338b20bc8 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..637b3538402d599668af1a90e405345cab4c45b7 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..fab36f6c21745e478934d98b64ca1220d0c9fc0e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2_bias.bin @@ -0,0 +1 @@ +É#¨:»Od:É;[€<F ×¹ÀÒC·tZ¸x[Ÿ9ƒ…Û<Übˆº \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..89ab6ad37cac94360f7f87c93676f353829f1deb Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a2a381337e13fe52959c838b4a2bedab3c3f8ab --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1_bias.bin @@ -0,0 +1 @@ +h4Q;¤ù;34¼j0_½G½–h;ìz/½ðÇÊ:àk¥¼{l½t+O;u¼8™¨¼d»”½®¼}8›<íO’¼äÕ¿»¤#½„ö¼”u<¼¿l…¼f¢;Ð4½ŠO ½>Øž¼7K¼04½ÎG:à'½ÔOF½M=; \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cd00b88c5be6e212f2d3a37c8ea2a8edb1ceca7 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..c0adf3e885ce855a0cc9d1b4b12f73665187159e Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..152c5bb0baae480f6b8d317889fc68f8d77247b6 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..58221f45cdc56049b2edc29c244ea9d797a87fb5 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..97d78a9610b15be285661c1d762026c9fa4100cb Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbda59beef150dfbca756621286f042ec8e247bf --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2_bias.bin @@ -0,0 +1 @@ +Ê%”½ùb½Ó„g½W•½$VĽéum½'Ƶ½J§’½·¾¶½›¢½ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..7536ef9f25e8fe7c7d47dac2857fe1cb291464d6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bias.bin @@ -0,0 +1,2 @@ +åMS¾¶õÊ=9Øø=…¾kO§>¬¯¾8g¾šÍ®>€Jn=ܑѽ¥> +”>ô+L>ä„—>a¹¥>Jº“¾B*3>‹èM>Û`>kßÅ< \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..48dabc33ff1ffc605aba73b34f884c2e43f23910 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..103ae6938d19b43b462c352d4c4d23c0bef7caaf Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff4242bb8002c7e81e5655bfa197541da6a9921f Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-images-idx3-ubyte b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-images-idx3-ubyte new file mode 100644 index 0000000000000000000000000000000000000000..1170b2cae98de7a524b163fcc379ac8f00925b12 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-images-idx3-ubyte differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte new file mode 100644 index 0000000000000000000000000000000000000000..d1c3a970612bbd2df47a3c0697f82bd394abc450 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9f5de5084ae4506e610ebe7deba62de40f3e536 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bin new file mode 100644 index 0000000000000000000000000000000000000000..232032080ffe11e84977e84ebfde02c728ba2718 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..67e323754adf73f147a1776916d6f48b7fdd7782 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bias.bin @@ -0,0 +1 @@ +Ž½ÒŠ;=-‰¼Çjz¼éœºYÕ¼—ˆ<X’ƒ¹ Mú¼œò•¼ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddde5fb3258d7abea7ece3fc0455e7532e4a30ee Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/mnist_float_input.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/mnist_float_input.bin new file mode 100644 index 0000000000000000000000000000000000000000..779dcf7f6ad72f3e22d5c96148d2f0f7e11e39b8 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/mnist_float_input.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..c841ed3b821617f81fc8764830868e64713668db Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4566564e1ad5dc9a0231575ca33f752b53c24a7 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..2304c792451e65d7a6f4615060dfc0c90164dc29 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..08c01b1586c269269d8dc8951afb7cd0c02606b2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2_bias.bin @@ -0,0 +1,2 @@ +è¾#¬¿ÀIº¾¹¾ë ɾ¨9D¾èÍž¾£½fn»>_Ái>ú»«¾PØ> +þv¾K,,¾az.¾&Ѿc…:¾EÂ¥>\Œ«>œöú½á†]½ÿ%]¾ö¸P>íi<>Ðû\¾¹ã¾åÀ)¾d€™>oÞ½«‡%>ŽÎ9½zNâ½È:>Ù˜\¾¦ºj>vP>ÿ›4>‚¾ÖDà¾õ§Ð¾¶õ¬>qS¾Œ'á¾:é;zb$>Àƒu>í9w>¦˜n½+Ò¾»‘…¾“ÐS½ª >јº½b¾#)¸¾’%e>=ä#<Íà¾Ã¾E‰2¾]«Y=r¦³½)*k¾ ,¿ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f59c44723443b40667340a60ae20311133c425a Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..24656c9753f9ab1b6d8b648f2fe7f3d6af24bebd Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b567de77acfc62b54ec4a676df8256b07a6b127 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7c0eae24cc844613f616fda43cd444c5f506ebf --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2_bias.bin @@ -0,0 +1 @@ +µs¾Ó˜>P5>ù>bëÈ=_Ïú½ƒA‚>Äô}>\¾+Nè< \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..c841ed3b821617f81fc8764830868e64713668db Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4566564e1ad5dc9a0231575ca33f752b53c24a7 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..2304c792451e65d7a6f4615060dfc0c90164dc29 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..08c01b1586c269269d8dc8951afb7cd0c02606b2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2_bias.bin @@ -0,0 +1,2 @@ +è¾#¬¿ÀIº¾¹¾ë ɾ¨9D¾èÍž¾£½fn»>_Ái>ú»«¾PØ> +þv¾K,,¾az.¾&Ѿc…:¾EÂ¥>\Œ«>œöú½á†]½ÿ%]¾ö¸P>íi<>Ðû\¾¹ã¾åÀ)¾d€™>oÞ½«‡%>ŽÎ9½zNâ½È:>Ù˜\¾¦ºj>vP>ÿ›4>‚¾ÖDà¾õ§Ð¾¶õ¬>qS¾Œ'á¾:é;zb$>Àƒu>í9w>¦˜n½+Ò¾»‘…¾“ÐS½ª >јº½b¾#)¸¾’%e>=ä#<Íà¾Ã¾E‰2¾]«Y=r¦³½)*k¾ ,¿ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f59c44723443b40667340a60ae20311133c425a Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..24656c9753f9ab1b6d8b648f2fe7f3d6af24bebd Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b567de77acfc62b54ec4a676df8256b07a6b127 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7c0eae24cc844613f616fda43cd444c5f506ebf --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2_bias.bin @@ -0,0 +1 @@ +µs¾Ó˜>P5>ù>bëÈ=_Ïú½ƒA‚>Äô}>\¾+Nè< \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c6404768d1262101afc967cd3b660a7e757cd25 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..8335621803cf622c0724fd437623d9277efb458c Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..09c1e72f56a144675d48e5d4969e260100c35ada Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d4d6e388f22cf825a1e8b434fd34080fc8912e8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2_bias.bin @@ -0,0 +1,3 @@ +†¹œ½~Ä +½Î"y½ô£½È½ès½eªf½tN¼ ¬½¨X½¨„?¼›þŸ¼ž¯½OÙ3½%§¼ß³H½†À +½&½JO0½¶wž½¥:o½ðе¼ý[Z½P½S‚³½ˆé?½Ä *½Š^[»`ì(½Æœ½þã½ÛV®¼¨ü˜½çtƽû¯¸º|Üv½®‰½ó®Ã¼@»¼ïÖŒ½ Hš¼ó‰»µh‹½ux½-N,½«Ú©½c†N»è¡Ž½µ¤@½ÈzÕ¼4™½.·!½ÐÊÀ¼ú®½c¬Ûº>.½xཽá¼Oû½ý+;¼¿œ•½i‹š½éµ–¼ØGP½ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..36e6a84bfba394921e4ca50c2acaba1482ea0ae1 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..56442e581a16e7f76a46866274c0ea66ea8be086 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ad7ae497969781128a6f98fd923655934fd217a Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..4cb6d824d540d66502b5e7ab0157e567a2d1a300 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2_bias.bin @@ -0,0 +1 @@ +©|½8Ý;ÿÄ‚½!´½!ζ½ß[“½×ý–½$Ðm½0Œ¦½›V´½ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca013bfaafd114694b2f83ecfc2d177fdb38990e Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbb78f121d9c2a1ae219a45cc20539a990648186 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1_bias.bin @@ -0,0 +1 @@ +éÃ?”è¿åSª?*?‡%?’òJ¿a‰€¿CM]¾ ˆÎ>Íž¿Pú€?µ¿ûd»¾® y¿>‰?"™Õ?<ª¾|«ˆ¿Vr³¿~†?_!/?]Ú@B:L¿Ý¼Í>‡Å·¾<J„¾;–©?ek>rËß¾ "N?cPs¿st]¿ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2.bin new file mode 100644 index 0000000000000000000000000000000000000000..621b5259648bb00ade00273d73ed30f2dc0af52c Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..61ffbbc2ae7d92dd220e09ca1418898bf63f973e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2_bias.bin @@ -0,0 +1,2 @@ +s+á¾ F§=´ïH¿J›@*G¿Uý±¿-ôA>»¢¿tƒô½q¾Ö¯ƒ>ÒáÙ½lÒ?é…&½æ¾cq}¿ö¼½‹@c÷o=òp?p–‡?ט?&j<ãR??&‚Ó¿w¥¿A…í¿%î>DXÁ¿ŽQ;@½¡£?µ1”?äßÀWp¿{è…¾Ù̦¾-G¿"©ˆ¿vË¥¾8Fd¿ãÔó½‚%?ZnÕ¾7ÍM¿a·K¿ó“<9/?]/P?|‚H?y5˜¿.A?bï¿-øˆ¿A.Ù?<t¨½&òÀÅ3>óN’¿sÝ +ÀF+è?‰;?,b«?©ýο‡ºæ> \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8a16853b5dcf00cfaac58438ee28c8c9273b077 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..3346319c7e19432d16e3eb471b53216fa6efb162 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1_bias.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2.bin new file mode 100644 index 0000000000000000000000000000000000000000..19286ba29008b48b34409b963e92e8817d35e6e3 Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2.bin differ diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2_bias.bin new file mode 100644 index 0000000000000000000000000000000000000000..94a9ea8487c42b6b27d411ab678f64085b12fef4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2_bias.bin @@ -0,0 +1 @@ +M‡“?#=e?N‹!¿‰ñs¿¸OÕ¿„@\?2±“¿j3?sÆ¢½º—o¿ \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/#error.h# b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/#error.h# new file mode 100644 index 0000000000000000000000000000000000000000..d474152fd80ecc90e07092795e513e2d97da0129 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/#error.h# @@ -0,0 +1,627 @@ + +#ifndef ERROR_HEADER +#define ERROR_HEADER + + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <ctime> +#include <cfloat> +#include <algorithm> +#include <sstream> +#include <vector> +#include <iostream> +#include <random> +#include <string> +#include <time.h> + +#include <curand.h> +#include <curand_kernel.h> +#include <math.h> +#include <assert.h> + + +#include "../include/debug.h" +#include "tensor.h" +#include "profiling.h" +#include "tensor_utils.cu" +#include "global_data.h" + + + + +void readOpenTunerFlags(char* file_name){ + + total_ops = 0; + op_counter = 0; + op_accuracies.clear(); + + + FILE* fp = fopen(file_name, "r"); + if(fp == NULL){ + ERROR("File 'opentuner_flags' not found \n"); + } + + int retVal = 200; + while(retVal != EOF){ + + int op_acc; + if(fp != NULL) + retVal = fscanf(fp, "%d", &op_acc); + else + op_acc = 0; + + op_accuracies.push_back(op_acc); + //printf("op_accuracies = %d, total_ops =%d \n", op_accuracies[total_ops], total_ops); + total_ops++; + } + + fclose(fp); +} + + + + + +/*__device__ inline void atomicAdd(float* address, float value) + +{ + + float old = value; + float new_old; + + do{ + new_old = atomicExch(address, 0.0f); + new_old += old; + } + + while ((old = atomicExch(address, new_old))!=0.0f); + +}; +*/ + + + + + +Norm_t* calculateNorms(Tensor* x, Tensor* x_orig){ + + deviceToHostCopy(x); + deviceToHostCopy(x_orig); + + // NOTE: Move floats to doubles - overflow is quite possible + float l1_norm = 0.0; + float l2_norm = 0.0; + float inf_norm = -1.0; + double total = 0.0; + + float* arr1 = (float*) x->host_data; + float* arr2 = (float*) x_orig->host_data; + + for(unsigned int i = 0; i < x->num_elems; i++){ + + total = total + arr2[i]; + + float diff = abs(arr1[i] - arr2[i]); + l1_norm += diff; + l2_norm += (arr1[i] - arr2[i]) * (arr1[i] - arr2[i]); + + if(inf_norm < diff) + inf_norm = diff; + } + + l1_norm = l1_norm / (x->num_elems * 1.0); + l2_norm = l2_norm / (x->num_elems * 1.0); + + double distribution_mean = total / (x->num_elems * 1.0); + l1_norm = l1_norm / distribution_mean; + l2_norm = l2_norm / distribution_mean; + + + Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t)); + norms->l1_norm = l1_norm; + norms->l2_norm = l2_norm; + norms->inf_norm = inf_norm; + + INFO("l1_norm = %f \n", l1_norm); + INFO("l2_norm = %f \n", l2_norm); + INFO("inf_norm = %f \n", inf_norm); + + return norms; +} + + + +Norm_t* calculateNorms2(Tensor* x, Tensor* x_orig){ + + deviceToHostCopy(x); + deviceToHostCopy(x_orig); + + // NOTE: Move all floats to doubles - overflow is quite possible + double l0_norm_A = 0.0; + double l0_norm_B = 0.0; + + double l1_norm_A = 0.0; + double l1_norm_B = 0.0; + + double l2_norm_A = 0.0; + double l2_norm_B = 0.0; + float inf_norm = -1.0; + float orig_inf_norm = -1.0; + double total_diff = 0.0; + double total_diff_squared = 0.0; + + float* arr1 = (float*) x->host_data; + float* arr2 = (float*) x_orig->host_data; + + for(unsigned int i = 0; i < x->num_elems; i++){ + + if(arr2[i] != 0.0) + l0_norm_A = l0_norm_A + 1.0; + if(arr1[i] != 0.0) + l0_norm_B = l0_norm_B + 1.0; + + l1_norm_A = l1_norm_A + abs(arr2[i]); + l1_norm_B = l1_norm_B + abs(arr1[i]); + + l2_norm_A = l2_norm_A + (arr2[i] * arr2[i]); + l2_norm_B = l2_norm_B + (arr1[i] * arr1[i]); + + float diff = abs(arr1[i] - arr2[i]); + total_diff = total_diff + diff; + float diff_squared = diff * diff; + total_diff_squared = total_diff_squared + diff_squared; + + + if(orig_inf_norm < diff){ + orig_inf_norm = diff; + } + + // Relative difference value + float normalized_diff = diff / arr2[i]; + if(inf_norm < normalized_diff){ + inf_norm = normalized_diff; + } + } + + // Relative L1 and Mean L1 norms of the difference Matrix + float mean_l1 = ( total_diff ) / x->num_elems; + float relative_l1 = ( total_diff ) / l1_norm_A; + + // Computing Relative L2 norm - i.e., Euclidean distance + double norm_root_A = sqrt(l2_norm_A); + double diff_root = sqrt(total_diff_squared); + float mean_l2 = diff_root / x->num_elems; + float relative_l2 = diff_root / norm_root_A; + + // Packing computed norms in Norm_t struct + Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t)); + // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware + norms->mean_l1 = mean_l1; + norms->mean_l2 = mean_l2; + norms->orig_inf_norm = orig_inf_norm; + + // Relative metrics (relative to distribution) - suitable for PROMISE + norms->l1_norm = relative_l1; + norms->l2_norm = relative_l2; + norms->inf_norm = inf_norm; + + INFO("l1_norm = %f \n", relative_l1); + INFO("l2_norm = %f \n", relative_l2); + INFO("inf_norm = %f \n", inf_norm); + + return norms; +} + + + + + +__global__ void normComputeKernel(float* A, float * B, double* l1_A, double* l2_A, + double* l1_diff, double* l2_diff, unsigned int n){ + + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if(i < n){ + + double diff = fabsf(A[i] - B[i]); + double diff_squared = diff * diff; + + atomicAdd( l1_A, fabsf(A[i]) ); + atomicAdd( l2_A, (A[i] * A[i]) ); + + atomicAdd( l1_diff, diff); + atomicAdd( l2_diff, diff_squared); + } +} + + + +// Compute Norms on the GPU +Norm_t* calculateNormsGPU(Tensor* x, Tensor* x_orig){ + + hostToDeviceCopy(x); + hostToDeviceCopy(x_orig); + + // FIXIT: Move all floats to doubles - overflow is possible + + double l1_norm_A; + double l2_norm_A; + + double l1_diff; + double l2_diff; + + // Device pointers + double *l1_norm_A_d; + double *l2_norm_A_d; + double *l1_diff_d; + double *l2_diff_d; + + cudaMalloc( (void**) &l1_norm_A_d, sizeof(double)); + cudaMalloc( (void**) &l2_norm_A_d, sizeof(double)); + cudaMalloc( (void**) &l1_diff_d, sizeof(double)); + cudaMalloc( (void**) &l2_diff_d, sizeof(double)); + + + float* arr1 = (float*) x->gpu_data; + float* arr2 = (float*) x_orig->gpu_data; + + int blockSize = 1024; + int gridSize = (int) ceil ((float) x->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + normComputeKernel<<<gridSize, blockSize>>>(arr1, arr2, l1_norm_A_d, l2_norm_A_d, l1_diff_d, l2_diff_d, x->num_elems); + + cudaMemcpy(&l1_norm_A, l1_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(&l2_norm_A, l2_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(&l1_diff, l1_diff_d, sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(&l2_diff, l2_diff_d, sizeof(double), cudaMemcpyDeviceToHost); + + + // Relative L1 and Mean L1 norms of the difference Matrix + float mean_l1 = l1_diff / x->num_elems; + float relative_l1 = l1_diff / l1_norm_A; + + // Computing Relative L2 norm - i.e., Euclidean distance + double norm_root_A = sqrt(l2_norm_A); + double diff_root = sqrt(l2_diff); + float mean_l2 = diff_root / x->num_elems; + float relative_l2 = diff_root / norm_root_A; + + // Packing computed norms in Norm_t struct + Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t)); + // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware + norms->mean_l1 = mean_l1; + norms->mean_l2 = mean_l2; + norms->orig_inf_norm = 0.0; + + // Relative metrics (relative to distribution) - suitable for PROMISE + norms->l1_norm = relative_l1; + norms->l2_norm = relative_l2; + norms->inf_norm = 0.0; + + INFO("l1_norm = %f \n", relative_l1); + INFO("l2_norm = %f \n", relative_l2); + + return norms; +} + + + + +__global__ void vecConstMul(float* A, float mul_factor, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + A[id] = A[id] * mul_factor; +} + + +__global__ void vecRound(float* A, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + A[id] = roundf(A[id]); +} + + +__global__ void vecConstDiv(float* A, float div_factor, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + A[id] = A[id] / div_factor; +} + + + +__global__ void vecMul(float* A, float* B, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + B[id] = A[id] * B[id]; +} + + +/**** ERROR injecion routines ******/ + +void initRandValues(Tensor* bias, int error_scale){ + + float scaling_values[20]; + + // FIXIT: Error knob 0 should be 0 zero + scaling_values[0] = 0.016; + scaling_values[1] = 0.018; + scaling_values[2] = 0.022; + scaling_values[3] = 0.026; + scaling_values[4] = 0.030; + scaling_values[5] = 0.035; + scaling_values[6] = 0.04; + scaling_values[7] = 0.06; + scaling_values[8] = 0.08; + scaling_values[9] = 0.1; + //scaling_values[8] = 0.15; + //scaling_values[9] = 0.2; + scaling_values[10] = 0.25; + scaling_values[11] = 0.3; + scaling_values[12] = 0.35; + scaling_values[13] = 0.4; + scaling_values[14] = 0.45; + // Values below are currently unused by Opentuner + scaling_values[15] = 0.5; + scaling_values[16] = 0.55; + scaling_values[17] = 0.6; + scaling_values[18] = 0.65; + scaling_values[19] = 0.7; + + curandGenerator_t gen; + + struct timespec ts; + if(timespec_get(&ts, TIME_UTC) == 0){ + printf("crashed \n"); + abort(); + } + + curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); + + curandSetPseudoRandomGeneratorSeed(gen, ts.tv_nsec^ts.tv_sec); + + curandGenerateNormal(gen, (float*) bias->gpu_data, bias->num_elems, 0.0, 1.0 * scaling_values[error_scale]); + + + /* + std::random_device rd; + std::mt19937 mt(rd()); + std::normal_distribution<float> distribution(0.0, 1.0); + + float* data_arr = (float*) bias->host_data; + for(int i = 0; i < bias->num_elems; i++){ + float rand_num = distribution(mt); + data_arr[i] = scaling_values[error_scale] * rand_num; + } + */ + +} + + + +void* addBitError(void* x_ptr, int error_scale){ + + if(error_scale > 6 || error_scale < 0){ + ERROR("Error Scale out of bounds \n"); + } + + INFO("*** TensorBitError \n"); + profileEvent("tensorBitError"); + + Tensor* x = (Tensor*) x_ptr; + + size_t* dim_sizes = x->dims.dim_sizes; + Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + // Copying x data into x_original - for computing Norms + tensorCopy(x, x_original); + + // Quadratic Error + float freq_factors[6]; + freq_factors[0] = 0.1; + freq_factors[1] = 0.2; + freq_factors[2] = 0.4; + freq_factors[3] = 0.6; + freq_factors[4] = 0.8; + freq_factors[5] = 1.0; + + float error_freq = freq_factors[error_scale]; + + deviceToHostCopy(x); + + unsigned char* data_arr = reinterpret_cast<unsigned char*>(x->host_data); + // FIXIT: Need to be careful about floating point datatype assumptions + int size_of_elem = 4; + + long int total_bytes = x->size_in_bytes; + long int error_iterations = total_bytes * 0.01 * error_freq; + INFO("total_bytes = %lu, error_iterations = %lu \n", total_bytes, error_iterations); + + srand(time(NULL)); + + for(int i = 0; i < error_iterations; i++){ + // FIXIT: The rand() is only specific to int - need long + long int index = rand() % total_bytes; + int N = 5; // The operation below flips the Nth bit + unsigned char fil = 1UL << N; + unsigned char val = data_arr[index]; + char flipped = val^fil; + data_arr[i] = flipped; + } + + + Norm_t* norms = calculateNorms2(x, x_original); + + profileEvent("tensorBitError_end", true); + + return (void*) norms; +} + + +void randomCeilAndFloor(float* x, size_t num_elems){ + + INFO("randomCeilAndFloor\n"); + + std::random_device rd; + std::mt19937 mt(rd()); + std::normal_distribution<float> distribution(0.0, 1.0); + + for(size_t i = 0; i < num_elems; i++){ + float rand_num = distribution(mt); + int val = abs(((int) rand_num) % 2); + if(val == 0) + x[i] = floor(x[i]); + else if(val == 1) + x[i] = ceil(x[i]); + } + +} + +// Routine for Adding RoundOff Errors +void* addRoundError(void* x_ptr, int error_scale){ + + if(error_scale > 11 || error_scale < 0){ + ERROR("Error Scale out of bounds \n"); + } + + INFO("*** TensorRoundError \n"); + profileEvent("tensorRoundError"); + + Tensor* x = (Tensor*) x_ptr; + + size_t* dim_sizes = x->dims.dim_sizes; + Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + // Copying x data into x_original - for computing Norms + tensorCopy(x, x_original); + + float round_factors[12]; + round_factors[0] = 1000000; // FIXIT: This should be zero error + round_factors[1] = 100; + round_factors[2] = 10; + round_factors[3] = 7; // Beyond this point, the error function is linear + round_factors[4] = 3; + round_factors[5] = 1; + round_factors[6] = 0.7; + round_factors[7] = 0.3; + round_factors[8] = 0.1; + round_factors[9] = 0.07; + round_factors[10] = 0.03; + round_factors[11] = 0.01; + + // THINK: Considering using error magnitudes in this scenario + + + float round_factor = round_factors[error_scale]; + INFO("round_factor = %f \n", round_factor); + + hostToDeviceCopy(x); + + int blockSize = 128; + int gridSize = (int) ceil ((float) x->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + // NOTE: Check if a large gridSize will work with really large tensors + vecConstMul<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems); + //vecRound<<<gridSize, blockSize>>>((float*) x->gpu_data, x->num_elems); + + deviceToHostCopy(x); + randomCeilAndFloor((float*) x->host_data, x->num_elems); + hostToDeviceCopy(x); + + vecConstDiv<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems); + + Norm_t* norms = calculateNorms2(x, x_original); + + profileEvent("tensorRoundError_end", true); + + return (void*) norms; +} + + + + +// Routine for Adding Gaussian Error +void* addGaussianError(void* x_ptr, int error_scale){ + + if(error_scale > 11 || error_scale < 0){ + ERROR("Error Scale out of bounds \n"); + } + + INFO("*** TensorAddError \n"); + profileEvent("tensorAddError"); + + Tensor* x = (Tensor*) x_ptr; + + size_t* dim_sizes = x->dims.dim_sizes; + Tensor* bias = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + // Copying x data into x_original - for computing Norms + tensorCopy(x, x_original); + + // NOTE: Error scale is used to generate the bias matrix + initRandValues(bias, error_scale); + + hostToDeviceCopy(x); + //hostToDeviceCopy(bias); + + + int blockSize = 1024; + int gridSize = (int) ceil ((float) x->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + // NOTE: Check if a large gridSize will work with really large tensors + vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems); + + float alpha = 1.0f, beta = 0.0f; + + // FIXIT: routine fails for 3D tensors + checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc, + bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data)); + + + //Norm_t* norms = calculateNorms2(x, x_original); + Norm_t* norms = calculateNormsGPU(x, x_original); + + + profileEvent("tensorAddError_end", true); + + return (void*) norms; +} + + + +void* tensorAddError(void* x_ptr, int error_scale){ + + void * new_x = addGaussianError(x_ptr, error_scale); + //void * new_x = addRoundError(x_ptr, error_scale); + //void * new_x = addBitError(x_ptr, error_scale); + return new_x; +} + + + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/.#error.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/.#error.h new file mode 120000 index 0000000000000000000000000000000000000000..a9c72af5b6737a57be1db44cd3231c6dda0857f0 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/.#error.h @@ -0,0 +1 @@ +hsharif3@tyler.cs.illinois.edu.21294:1541049775 \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h new file mode 100644 index 0000000000000000000000000000000000000000..4d38c6eea4451328ca040db67250dcaeae0df94d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h @@ -0,0 +1,107 @@ + + +#ifndef RUNTIME_DEBUG +#define RUNTIME_DEBUG + +#define LOG_DEBUG 0 // Sets the debug logging to true +#define LOG_INFO 0 // Sets the info logging to true + +#include "tensor.h" + + +#define FatalError(s) do { \ + std::stringstream _where, _message; \ + _where << __FILE__ << ':' << __LINE__; \ + _message << std::string(s) + "\n" << __FILE__ << ':' << __LINE__; \ + std::cerr << _message.str() << "\nAborting...\n"; \ + cudaDeviceReset(); \ + exit(1); \ +} while(0) + + +#define checkCUDNN(status) do { \ + std::stringstream _error; \ + if (status != CUDNN_STATUS_SUCCESS) { \ + _error << "CUDNN failure: " << cudnnGetErrorString(status); \ + FatalError(_error.str()); \ + } \ +} while(0) + + +#define checkCudaErrors(status) do { \ + std::stringstream _error; \ + if (status != 0) { \ + _error << "Cuda failure: " << status; \ + FatalError(_error.str()); \ + } \ +} while(0) + + + +void INFO(char* format, ...){ + if(!LOG_INFO) // Don't print if logging info is disabled + return; + va_list args; + va_start(args, format); + printf("INFO: "); + vprintf(format, args); + va_end(args); +} + +void DEBUG(char* format, ...){ + if(!LOG_DEBUG) // Don't print if logging info is disabled + return; + va_list args; + va_start(args, format); + printf("DEBUG: "); + vprintf(format, args); + va_end(args); +} + +void ERROR(char* format, ...){ + if(!LOG_DEBUG) // Don't print if logging info is disabled + return; + va_list args; + va_start(args, format); + printf("ERROR!: "); + vprintf(format, args); + va_end(args); + + abort(); +} + + +void fillOnes(struct Tensor* tensor){ + // initialization is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems; i++){ + data_arr[i] = 1.0; + } + } +} + + +void printTensorDescInfo(struct Tensor* tensor){ + + cudnnDataType_t dType; + int nStride, cStride, hStride, wStride; + int size1, size2, size3, size4; + cudnnGetTensor4dDescriptor(tensor->tensor_desc, + &dType, + &size1, &size2, &size3, &size4, + &nStride, &cStride, &hStride, &wStride); + + DEBUG("dType = %d, size1 = %d, size2 = %d, size3 = %d, size4 = %d \n", + dType, size1, size2, size3, size4); + + DEBUG("nStride = %d, cStride = %d, hStride = %d, wStride = %d \n", + nStride, cStride, hStride, wStride); + +} + + + + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h new file mode 100644 index 0000000000000000000000000000000000000000..e0b0a87eab74a39566cb6864dac5b85e705034db --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h @@ -0,0 +1,630 @@ + +#ifndef ERROR_HEADER +#define ERROR_HEADER + + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <ctime> +#include <cfloat> +#include <algorithm> +#include <sstream> +#include <vector> +#include <iostream> +#include <random> +#include <string> +#include <time.h> + +#include <curand.h> +#include <curand_kernel.h> +#include <math.h> +#include <assert.h> + + +#include "../include/debug.h" +#include "tensor.h" +#include "profiling.h" +#include "tensor_utils.cu" +#include "global_data.h" + + + + +void readOpenTunerFlags(char* file_name){ + + total_ops = 0; + op_counter = 0; + op_accuracies.clear(); + + + FILE* fp = fopen(file_name, "r"); + if(fp == NULL){ + ERROR("File 'opentuner_flags' not found \n"); + } + + int retVal = 200; + while(retVal != EOF){ + + int op_acc; + if(fp != NULL) + retVal = fscanf(fp, "%d", &op_acc); + else + op_acc = 0; + + op_accuracies.push_back(op_acc); + //printf("op_accuracies = %d, total_ops =%d \n", op_accuracies[total_ops], total_ops); + total_ops++; + } + + fclose(fp); +} + + + + + +/*__device__ inline void atomicAdd(float* address, float value) + +{ + + float old = value; + float new_old; + + do{ + new_old = atomicExch(address, 0.0f); + new_old += old; + } + + while ((old = atomicExch(address, new_old))!=0.0f); + +}; +*/ + + + + + +Norm_t* calculateNorms(Tensor* x, Tensor* x_orig){ + + deviceToHostCopy(x); + deviceToHostCopy(x_orig); + + // NOTE: Move floats to doubles - overflow is quite possible + float l1_norm = 0.0; + float l2_norm = 0.0; + float inf_norm = -1.0; + double total = 0.0; + + float* arr1 = (float*) x->host_data; + float* arr2 = (float*) x_orig->host_data; + + for(unsigned int i = 0; i < x->num_elems; i++){ + + total = total + arr2[i]; + + float diff = abs(arr1[i] - arr2[i]); + l1_norm += diff; + l2_norm += (arr1[i] - arr2[i]) * (arr1[i] - arr2[i]); + + if(inf_norm < diff) + inf_norm = diff; + } + + l1_norm = l1_norm / (x->num_elems * 1.0); + l2_norm = l2_norm / (x->num_elems * 1.0); + + double distribution_mean = total / (x->num_elems * 1.0); + l1_norm = l1_norm / distribution_mean; + l2_norm = l2_norm / distribution_mean; + + + Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t)); + norms->l1_norm = l1_norm; + norms->l2_norm = l2_norm; + norms->inf_norm = inf_norm; + + INFO("l1_norm = %f \n", l1_norm); + INFO("l2_norm = %f \n", l2_norm); + INFO("inf_norm = %f \n", inf_norm); + + return norms; +} + + + +Norm_t* calculateNorms2(Tensor* x, Tensor* x_orig){ + + deviceToHostCopy(x); + deviceToHostCopy(x_orig); + + // NOTE: Move all floats to doubles - overflow is quite possible + double l0_norm_A = 0.0; + double l0_norm_B = 0.0; + + double l1_norm_A = 0.0; + double l1_norm_B = 0.0; + + double l2_norm_A = 0.0; + double l2_norm_B = 0.0; + float inf_norm = -1.0; + float orig_inf_norm = -1.0; + double total_diff = 0.0; + double total_diff_squared = 0.0; + + float* arr1 = (float*) x->host_data; + float* arr2 = (float*) x_orig->host_data; + + for(unsigned int i = 0; i < x->num_elems; i++){ + + if(arr2[i] != 0.0) + l0_norm_A = l0_norm_A + 1.0; + if(arr1[i] != 0.0) + l0_norm_B = l0_norm_B + 1.0; + + l1_norm_A = l1_norm_A + abs(arr2[i]); + l1_norm_B = l1_norm_B + abs(arr1[i]); + + l2_norm_A = l2_norm_A + (arr2[i] * arr2[i]); + l2_norm_B = l2_norm_B + (arr1[i] * arr1[i]); + + float diff = abs(arr1[i] - arr2[i]); + total_diff = total_diff + diff; + float diff_squared = diff * diff; + total_diff_squared = total_diff_squared + diff_squared; + + + if(orig_inf_norm < diff){ + orig_inf_norm = diff; + } + + // Relative difference value + float normalized_diff = diff / arr2[i]; + if(inf_norm < normalized_diff){ + inf_norm = normalized_diff; + } + } + + // Relative L1 and Mean L1 norms of the difference Matrix + float mean_l1 = ( total_diff ) / x->num_elems; + float relative_l1 = ( total_diff ) / l1_norm_A; + + // Computing Relative L2 norm - i.e., Euclidean distance + double norm_root_A = sqrt(l2_norm_A); + double diff_root = sqrt(total_diff_squared); + float mean_l2 = diff_root / x->num_elems; + float relative_l2 = diff_root / norm_root_A; + + // Packing computed norms in Norm_t struct + Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t)); + // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware + norms->mean_l1 = mean_l1; + norms->mean_l2 = mean_l2; + norms->orig_inf_norm = orig_inf_norm; + + // Relative metrics (relative to distribution) - suitable for PROMISE + norms->l1_norm = relative_l1; + norms->l2_norm = relative_l2; + norms->inf_norm = inf_norm; + + INFO("l1_norm = %f \n", relative_l1); + INFO("l2_norm = %f \n", relative_l2); + INFO("inf_norm = %f \n", inf_norm); + + return norms; +} + + + + + +__global__ void normComputeKernel(float* A, float * B, double* l1_A, double* l2_A, + double* l1_diff, double* l2_diff, unsigned int n){ + + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if(i < n){ + + double diff = fabsf(A[i] - B[i]); + double diff_squared = diff * diff; + + atomicAdd( l1_A, fabsf(A[i]) ); + atomicAdd( l2_A, (A[i] * A[i]) ); + + atomicAdd( l1_diff, diff); + atomicAdd( l2_diff, diff_squared); + } +} + + + +// Compute Norms on the GPU +Norm_t* calculateNormsGPU(Tensor* x, Tensor* x_orig){ + + hostToDeviceCopy(x); + hostToDeviceCopy(x_orig); + + // FIXIT: Move all floats to doubles - overflow is possible + + double l1_norm_A; + double l2_norm_A; + + double l1_diff; + double l2_diff; + + // Device pointers + double *l1_norm_A_d; + double *l2_norm_A_d; + double *l1_diff_d; + double *l2_diff_d; + + cudaMalloc( (void**) &l1_norm_A_d, sizeof(double)); + cudaMalloc( (void**) &l2_norm_A_d, sizeof(double)); + cudaMalloc( (void**) &l1_diff_d, sizeof(double)); + cudaMalloc( (void**) &l2_diff_d, sizeof(double)); + + + float* arr1 = (float*) x->gpu_data; + float* arr2 = (float*) x_orig->gpu_data; + + int blockSize = 1024; + int gridSize = (int) ceil ((float) x->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + normComputeKernel<<<gridSize, blockSize>>>(arr1, arr2, l1_norm_A_d, l2_norm_A_d, l1_diff_d, l2_diff_d, x->num_elems); + + cudaMemcpy(&l1_norm_A, l1_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(&l2_norm_A, l2_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(&l1_diff, l1_diff_d, sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(&l2_diff, l2_diff_d, sizeof(double), cudaMemcpyDeviceToHost); + + + // Relative L1 and Mean L1 norms of the difference Matrix + float mean_l1 = l1_diff / x->num_elems; + float relative_l1 = l1_diff / l1_norm_A; + + // Computing Relative L2 norm - i.e., Euclidean distance + double norm_root_A = sqrt(l2_norm_A); + double diff_root = sqrt(l2_diff); + float mean_l2 = diff_root / x->num_elems; + float relative_l2 = diff_root / norm_root_A; + + // Packing computed norms in Norm_t struct + Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t)); + // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware + norms->mean_l1 = mean_l1; + norms->mean_l2 = mean_l2; + norms->orig_inf_norm = 0.0; + + // Relative metrics (relative to distribution) - suitable for PROMISE + norms->l1_norm = relative_l1; + norms->l2_norm = relative_l2; + norms->inf_norm = 0.0; + + INFO("l1_norm = %f \n", relative_l1); + INFO("l2_norm = %f \n", relative_l2); + + return norms; +} + + + + +__global__ void vecConstMul(float* A, float mul_factor, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + A[id] = A[id] * mul_factor; +} + + +__global__ void vecRound(float* A, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + A[id] = roundf(A[id]); +} + + +__global__ void vecConstDiv(float* A, float div_factor, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + A[id] = A[id] / div_factor; +} + + + +__global__ void vecMul(float* A, float* B, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n) + B[id] = A[id] * B[id]; +} + + +/**** ERROR injecion routines ******/ + +void initRandValues(Tensor* bias, int error_scale){ + + float scaling_values[20]; + + // FIXIT: Error knob 0 should be 0 zero + scaling_values[0] = 0.016; + scaling_values[1] = 0.018; + scaling_values[2] = 0.022; + scaling_values[3] = 0.026; + scaling_values[4] = 0.030; + scaling_values[5] = 0.035; + scaling_values[6] = 0.04; + scaling_values[7] = 0.06; + scaling_values[8] = 0.08; + scaling_values[9] = 0.1; + //scaling_values[8] = 0.15; + //scaling_values[9] = 0.2; + scaling_values[10] = 0.25; + scaling_values[11] = 0.3; + scaling_values[12] = 0.35; + scaling_values[13] = 0.4; + scaling_values[14] = 0.45; + // Values below are currently unused by Opentuner + scaling_values[15] = 0.5; + scaling_values[16] = 0.55; + scaling_values[17] = 0.6; + scaling_values[18] = 0.65; + scaling_values[19] = 0.7; + + + curandGenerator_t gen; + + struct timespec ts; + + if(timespec_get(&ts, TIME_UTC) == 0){ + printf("crashed \n"); + abort(); + } + + curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); + + curandSetPseudoRandomGeneratorSeed(gen, ts.tv_nsec^ts.tv_sec); + + curandGenerateNormal(gen, (float*) bias->gpu_data, bias->num_elems, 0.0, 1.0 * scaling_values[error_scale]); + + + /* + std::random_device rd; + std::mt19937 mt(rd()); + std::normal_distribution<float> distribution(0.0, 1.0); + + float* data_arr = (float*) bias->host_data; + for(int i = 0; i < bias->num_elems; i++){ + float rand_num = distribution(mt); + data_arr[i] = scaling_values[error_scale] * rand_num; + } + */ + +} + + + +void* addBitError(void* x_ptr, int error_scale){ + + if(error_scale > 6 || error_scale < 0){ + ERROR("Error Scale out of bounds \n"); + } + + INFO("*** TensorBitError \n"); + profileEvent("tensorBitError"); + + Tensor* x = (Tensor*) x_ptr; + + size_t* dim_sizes = x->dims.dim_sizes; + Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + // Copying x data into x_original - for computing Norms + tensorCopy(x, x_original); + + // Quadratic Error + float freq_factors[6]; + freq_factors[0] = 0.1; + freq_factors[1] = 0.2; + freq_factors[2] = 0.4; + freq_factors[3] = 0.6; + freq_factors[4] = 0.8; + freq_factors[5] = 1.0; + + float error_freq = freq_factors[error_scale]; + + deviceToHostCopy(x); + + unsigned char* data_arr = reinterpret_cast<unsigned char*>(x->host_data); + // FIXIT: Need to be careful about floating point datatype assumptions + int size_of_elem = 4; + + long int total_bytes = x->size_in_bytes; + long int error_iterations = total_bytes * 0.01 * error_freq; + INFO("total_bytes = %lu, error_iterations = %lu \n", total_bytes, error_iterations); + + srand(time(NULL)); + + for(int i = 0; i < error_iterations; i++){ + // FIXIT: The rand() is only specific to int - need long + long int index = rand() % total_bytes; + int N = 5; // The operation below flips the Nth bit + unsigned char fil = 1UL << N; + unsigned char val = data_arr[index]; + char flipped = val^fil; + data_arr[i] = flipped; + } + + + Norm_t* norms = calculateNorms2(x, x_original); + + profileEvent("tensorBitError_end", true); + + return (void*) norms; + +} + + +void randomCeilAndFloor(float* x, size_t num_elems){ + + INFO("randomCeilAndFloor\n"); + + std::random_device rd; + std::mt19937 mt(rd()); + std::normal_distribution<float> distribution(0.0, 1.0); + + for(size_t i = 0; i < num_elems; i++){ + float rand_num = distribution(mt); + int val = abs(((int) rand_num) % 2); + if(val == 0) + x[i] = floor(x[i]); + else if(val == 1) + x[i] = ceil(x[i]); + } + +} + +// Routine for Adding RoundOff Errors +void* addRoundError(void* x_ptr, int error_scale){ + + if(error_scale > 11 || error_scale < 0){ + ERROR("Error Scale out of bounds \n"); + } + + INFO("*** TensorRoundError \n"); + profileEvent("tensorRoundError"); + + Tensor* x = (Tensor*) x_ptr; + + size_t* dim_sizes = x->dims.dim_sizes; + Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + // Copying x data into x_original - for computing Norms + tensorCopy(x, x_original); + + float round_factors[12]; + round_factors[0] = 1000000; // FIXIT: This should be zero error + round_factors[1] = 100; + round_factors[2] = 10; + round_factors[3] = 7; // Beyond this point, the error function is linear + round_factors[4] = 3; + round_factors[5] = 1; + round_factors[6] = 0.7; + round_factors[7] = 0.3; + round_factors[8] = 0.1; + round_factors[9] = 0.07; + round_factors[10] = 0.03; + round_factors[11] = 0.01; + + // THINK: Considering using error magnitudes in this scenario + + + float round_factor = round_factors[error_scale]; + INFO("round_factor = %f \n", round_factor); + + hostToDeviceCopy(x); + + int blockSize = 128; + int gridSize = (int) ceil ((float) x->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + // NOTE: Check if a large gridSize will work with really large tensors + vecConstMul<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems); + //vecRound<<<gridSize, blockSize>>>((float*) x->gpu_data, x->num_elems); + + deviceToHostCopy(x); + randomCeilAndFloor((float*) x->host_data, x->num_elems); + hostToDeviceCopy(x); + + vecConstDiv<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems); + + Norm_t* norms = calculateNorms2(x, x_original); + + profileEvent("tensorRoundError_end", true); + + return (void*) norms; +} + + + + +// Routine for Adding Gaussian Error +void* addGaussianError(void* x_ptr, int error_scale){ + + if(error_scale > 11 || error_scale < 0){ + ERROR("Error Scale out of bounds \n"); + } + + INFO("*** TensorAddError \n"); + profileEvent("tensorAddError"); + + Tensor* x = (Tensor*) x_ptr; + + size_t* dim_sizes = x->dims.dim_sizes; + Tensor* bias = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format, + dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + + // Copying x data into x_original - for computing Norms + tensorCopy(x, x_original); + + // NOTE: Error scale is used to generate the bias matrix + initRandValues(bias, error_scale); + + hostToDeviceCopy(x); + //hostToDeviceCopy(bias); + + + int blockSize = 1024; + int gridSize = (int) ceil ((float) x->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + // NOTE: Check if a large gridSize will work with really large tensors + vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems); + + float alpha = 1.0f, beta = 0.0f; + + // FIXIT: routine fails for 3D tensors + checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc, + bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data)); + + + //Norm_t* norms = calculateNorms2(x, x_original); + Norm_t* norms = calculateNormsGPU(x, x_original); + + + profileEvent("tensorAddError_end", true); + + return (void*) norms; +} + + + +void* tensorAddError(void* x_ptr, int error_scale){ + + void * new_x = addGaussianError(x_ptr, error_scale); + //void * new_x = addRoundError(x_ptr, error_scale); + //void * new_x = addBitError(x_ptr, error_scale); + return new_x; +} + + + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h new file mode 100644 index 0000000000000000000000000000000000000000..252427c65379aa977237652eb4435e685dbc3403 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h @@ -0,0 +1,114 @@ +// Copyright (c) 1993-2016, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This code modified from the public domain code here: +// https://gist.github.com/rygorous/2156668 +// The URL above includes more robust conversion routines +// that handle Inf and NaN correctly. +// +// It is recommended to use the more robust versions in production code. + +typedef unsigned uint; + +union FP32 +{ + uint u; + float f; + struct + { + uint Mantissa : 23; + uint Exponent : 8; + uint Sign : 1; + }; +}; + +union FP16 +{ + unsigned short u; + struct + { + uint Mantissa : 10; + uint Exponent : 5; + uint Sign : 1; + }; +}; + +// Approximate solution. This is faster but converts some sNaNs to +// infinity and doesn't round correctly. Handle with care. +// Approximate solution. This is faster but converts some sNaNs to +// infinity and doesn't round correctly. Handle with care. +static half approx_float_to_half(float fl) +{ + FP32 f32infty = { 255 << 23 }; + FP32 f16max = { (127 + 16) << 23 }; + FP32 magic = { 15 << 23 }; + FP32 expinf = { (255 ^ 31) << 23 }; + uint sign_mask = 0x80000000u; + FP16 o = { 0 }; + + FP32 f = *((FP32*)&fl); + + uint sign = f.u & sign_mask; + f.u ^= sign; + + if (!(f.f < f32infty.u)) // Inf or NaN + o.u = f.u ^ expinf.u; + else + { + if (f.f > f16max.f) f.f = f16max.f; + f.f *= magic.f; + } + + o.u = f.u >> 13; // Take the mantissa bits + o.u |= sign >> 16; + return *((half*)&o); +} + +// from half->float code - just for verification. +static float half_to_float(half hf) +{ + FP16 h = *((FP16*)&hf); + + static const FP32 magic = { 113 << 23 }; + static const uint shifted_exp = 0x7c00 << 13; // exponent mask after shift + FP32 o; + + o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits + uint exp = shifted_exp & o.u; // just the exponent + o.u += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp == shifted_exp) // Inf/NaN? + o.u += (128 - 16) << 23; // extra exp adjust + else if (exp == 0) // Zero/Denormal? + { + o.u += 1 << 23; // extra exp adjust + o.f -= magic.f; // renormalize + } + + o.u |= (h.u & 0x8000) << 16; // sign bit + return o.f; +} \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h new file mode 100644 index 0000000000000000000000000000000000000000..64aee8231b54d52710192fc7d598d6ed162f1338 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h @@ -0,0 +1,274 @@ +/* + * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO LICENSEE: + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + * + * These Licensed Deliverables contained herein is PROPRIETARY and + * CONFIDENTIAL to NVIDIA and is being provided under the terms and + * conditions of a form of NVIDIA software license agreement by and + * between NVIDIA and Licensee ("License Agreement") or electronically + * accepted by Licensee. Notwithstanding any terms or conditions to + * the contrary in the License Agreement, reproduction or disclosure + * of the Licensed Deliverables to any third party without the express + * written consent of NVIDIA is prohibited. + * + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE + * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE + * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS + * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. + * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED + * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, + * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE + * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY + * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY + * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THESE LICENSED DELIVERABLES. + * + * U.S. Government End Users. These Licensed Deliverables are a + * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT + * 1995), consisting of "commercial computer software" and "commercial + * computer software documentation" as such terms are used in 48 + * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government + * only as a commercial end item. Consistent with 48 C.F.R.12.212 and + * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all + * U.S. Government End Users acquire the Licensed Deliverables with + * only those rights set forth herein. + * + * Any use of the Licensed Deliverables in individual and commercial + * software must include, in the user documentation and internal + * comments to the code, the above Disclaimer and U.S. Government End + * Users Notice. + */ + +// Conversion from/to 16-bit floating point (half-precision). + +#if !defined(_FP16_EMU_H_) +#define _FP16_EMU_H_ + +#include <driver_types.h> +#include <cuda_fp16.h> + +// Necessary to ensure visibility of CUDART_VERSION macro +#include <cuda_runtime_api.h> + +// Definition of '__half_raw' was not provided before CUDA 9.0. +// '__half_raw' is our type where the unsigned 16-bit integer +// data member 'x' can be accessed in both CUDA 9.0 and 8.0. +#if CUDART_VERSION < 9000 +typedef __half __half_raw; +#endif + +// Internally, in CUDNN we use half1 struct as the FP16 type. +typedef __half half1; + +#define HLF_EPSILON 4.887581E-04 +#define HLF_MIN 6.103516E-05 +#define HLF_MAX 6.550400E+04 + +half1 cpu_float2half_rn(float f); + +float cpu_half2float(half1 h); + +static __inline__ __device__ __host__ half1 habs(half1 h) +{ + __half_raw hr = reinterpret_cast<__half_raw&>(h); + hr.x &= 0x7fffU; + return reinterpret_cast<half1&>(hr); +} + +static __inline__ __device__ __host__ half1 hneg(half1 h) +{ + __half_raw hr = reinterpret_cast<__half_raw&>(h); + hr.x ^= 0x8000U; + return reinterpret_cast<half1&>(hr); +} + +static __inline__ __device__ __host__ int ishnan(half1 h) +{ + // When input is NaN, exponent is all ones and mantissa is non-zero. + __half_raw hr = reinterpret_cast<__half_raw&>(h); + return (hr.x & 0x7c00U) == 0x7c00U && (hr.x & 0x03ffU) != 0; +} + +static __inline__ __device__ __host__ int ishinf(half1 h) +{ + // When input is +/- inf, exponent is all ones and mantissa is zero. + __half_raw hr = reinterpret_cast<__half_raw&>(h); + return (hr.x & 0x7c00U) == 0x7c00U && (hr.x & 0x03ffU) == 0; +} + +static __inline__ __device__ __host__ int ishequ(half1 x, half1 y) +{ + __half_raw xr = reinterpret_cast<__half_raw&>(x); + __half_raw yr = reinterpret_cast<__half_raw&>(y); + return ishnan(x) == 0 && ishnan(y) == 0 && xr.x == yr.x; +} + +// Returns 0.0000 in FP16 binary form +static __inline__ __device__ __host__ half1 hzero() +{ + __half_raw hr; + hr.x = 0x0000U; + return reinterpret_cast<half1&>(hr); +} + +// Returns 1.0000 in FP16 binary form +static __inline__ __device__ __host__ half1 hone() +{ + __half_raw hr; + hr.x = 0x3c00U; + return reinterpret_cast<half1&>(hr); +} + +// Returns quiet NaN, the most significant fraction bit #9 is set +static __inline__ __device__ __host__ half1 hnan() +{ + __half_raw hr; + hr.x = 0x7e00U; + return reinterpret_cast<half1&>(hr); +} + +// Largest positive FP16 value, corresponds to 6.5504e+04 +static __inline__ __device__ __host__ half1 hmax() +{ + // Exponent all ones except LSB (0x1e), mantissa is all ones (0x3ff) + __half_raw hr; + hr.x = 0x7bffU; + return reinterpret_cast<half1&>(hr); +} + +// Smallest positive (normalized) FP16 value, corresponds to 6.1035e-05 +static __inline__ __device__ __host__ half1 hmin() +{ + // Exponent is 0x01 (5 bits), mantissa is all zeros (10 bits) + __half_raw hr; + hr.x = 0x0400U; + return reinterpret_cast<half1&>(hr); +} + + + + + + + + + + + +#define STATIC_ASSERT(cond) do { typedef char compile_time_assert[(cond) ? 1 : -1]; } while (0) + +// Host functions for converting between FP32 and FP16 formats +// Paulius Micikevicius (pauliusm@nvidia.com) + +half1 cpu_float2half_rn(float f) +{ + unsigned x = *((int*)(void*)(&f)); + unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1; + unsigned sign, exponent, mantissa; + + __half_raw hr; + + // Get rid of +NaN/-NaN case first. + if (u > 0x7f800000) { + hr.x = 0x7fffU; + return reinterpret_cast<half1&>(hr); + } + + sign = ((x >> 16) & 0x8000); + + // Get rid of +Inf/-Inf, +0/-0. + if (u > 0x477fefff) { + hr.x = sign | 0x7c00U; + return reinterpret_cast<half1&>(hr); + } + if (u < 0x33000001) { + hr.x = sign | 0x0000U; + return reinterpret_cast<half1&>(hr); + } + + exponent = ((u >> 23) & 0xff); + mantissa = (u & 0x7fffff); + + if (exponent > 0x70) { + shift = 13; + exponent -= 0x70; + } else { + shift = 0x7e - exponent; + exponent = 0; + mantissa |= 0x800000; + } + lsb = (1 << shift); + lsb_s1 = (lsb >> 1); + lsb_m1 = (lsb - 1); + + // Round to nearest even. + remainder = (mantissa & lsb_m1); + mantissa >>= shift; + if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) { + ++mantissa; + if (!(mantissa & 0x3ff)) { + ++exponent; + mantissa = 0; + } + } + + hr.x = (sign | (exponent << 10) | mantissa); + + return reinterpret_cast<half1&>(hr); +} + + +float cpu_half2float(half1 h) +{ + STATIC_ASSERT(sizeof(int) == sizeof(float)); + + __half_raw hr = reinterpret_cast<__half_raw&>(h); + + unsigned sign = ((hr.x >> 15) & 1); + unsigned exponent = ((hr.x >> 10) & 0x1f); + unsigned mantissa = ((hr.x & 0x3ff) << 13); + + if (exponent == 0x1f) { /* NaN or Inf */ + mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0); + exponent = 0xff; + } else if (!exponent) { /* Denorm or Zero */ + if (mantissa) { + unsigned int msb; + exponent = 0x71; + do { + msb = (mantissa & 0x400000); + mantissa <<= 1; /* normalize */ + --exponent; + } while (!msb); + mantissa &= 0x7fffff; /* 1.mantissa is implicit */ + } + } else { + exponent += 0x70; + } + + int temp = ((sign << 31) | (exponent << 23) | mantissa); + + return reinterpret_cast<float&>(temp); +} + + + + + + + +#endif // _FP16_EMU_H_ + + + + + + diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..d2b4108ec1b49c6e4bd6b040e9cf2fb82143f129 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu @@ -0,0 +1,272 @@ +#include <iostream> +#include <string> +#include <cublas_v2.h> +#include <cuda_fp16.h> +#include "fp16_emu.h" + +inline cudaError_t checkCuda(cudaError_t result) { + if (result != cudaSuccess) + std::cerr << "CUDA Runtime Error: " << cudaGetErrorString(result) << "\n"; + return result; +} + +inline cublasStatus_t checkCublas(cublasStatus_t result) { + if (result != CUBLAS_STATUS_SUCCESS) + std::cerr << "cuBLAS Error: " << result << "\n"; + return result; +} + +template <typename T> +inline void printArray(const T * const __restrict__ array, + const unsigned elements) { + for (unsigned i = 0; i < elements; i++) + std::cout << std::to_string(array[i]) << "\n"; +} + +// initialization +template <typename T> +__global__ void initKernel(T * const __restrict__ array, + const unsigned elements) { + const unsigned idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < elements) + array[idx] = 1.2; +} + +template <typename T> +void init(T * const __restrict__ array, + const unsigned elements) { + const unsigned block_size = 512; + const unsigned num_blocks = (elements + block_size - 1) / block_size; + initKernel<<<num_blocks, block_size>>>(array, elements); + checkCuda(cudaDeviceSynchronize()); +} + +// float to half +__global__ void f2hKernel(const float * const __restrict__ input, + const unsigned elements, + half * const __restrict__ output) { + const unsigned idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < elements) + output[idx] = __float2half_rn(input[idx]); +} + +void f2h(const float * const __restrict__ input, + const unsigned elements, + half * const __restrict__ output) { + const unsigned block_size = 512; + const unsigned num_blocks = (elements + block_size - 1) / block_size; + f2hKernel<<<num_blocks, block_size>>>(input, elements, output); + checkCuda(cudaDeviceSynchronize()); +} + +// half to float +__global__ void h2fKernel(const half * const __restrict__ input, + const unsigned elements, + float * const __restrict__ output) { + const unsigned idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < elements) + output[idx] = __half2float(input[idx]); +} + +void h2f(const half * const __restrict__ input, + const unsigned elements, + float * const __restrict__ output) { + const unsigned block_size = 512; + const unsigned num_blocks = (elements + block_size - 1) / block_size; + h2fKernel<<<num_blocks, block_size>>>(input, elements, output); + checkCuda(cudaDeviceSynchronize()); +} + +void sgemm(const float * const __restrict__ a, + const unsigned num_rows_a, + const unsigned num_cols_a, + const float * const __restrict__ b, + const unsigned num_rows_b, + const unsigned num_cols_b, + float * const __restrict__ c) { + const unsigned iterations = 10; + float kernel_time; + cudaEvent_t start; + cudaEvent_t stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cublasHandle_t handle; + checkCublas(cublasCreate(&handle)); + + // Enable Tensor Cores + checkCublas(cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH)); + + const float alpha_ = 1.0; + const float beta_ = 0.0; + const float *alpha = &alpha_; + const float *beta = &beta_; + + cudaEventRecord(start, 0); + for (unsigned i = 0; i < iterations; i++) { + checkCublas(cublasGemmEx(handle, + CUBLAS_OP_N, + CUBLAS_OP_N, + // Dimensions + num_rows_a, + num_cols_b, + num_cols_a, + alpha, + // A + a, + CUDA_R_32F, + num_rows_a, + // B + b, + CUDA_R_32F, + num_rows_b, + beta, + // C + c, + CUDA_R_32F, + num_rows_a, + // Compute precision and algorithm + CUDA_R_32F, + CUBLAS_GEMM_DEFAULT_TENSOR_OP)); + } + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&kernel_time, start, stop); + + std::cout << "FP32 GEMM: " << std::to_string(kernel_time / iterations) << " ms\n"; +} + +void hgemm(const float * const __restrict__ af, + const unsigned num_rows_a, + const unsigned num_cols_a, + const float * const __restrict__ bf, + const unsigned num_rows_b, + const unsigned num_cols_b, + float * const __restrict__ cf) { + const unsigned iterations = 10; + + const unsigned num_elements_a = num_rows_a * num_cols_a; + const unsigned num_elements_b = num_rows_b * num_cols_b; + const unsigned num_elements_c = num_rows_a * num_cols_b; + + float to_fp16_time; + float to_fp32_time; + float kernel_time; + float total_time; + + cudaEvent_t start; + cudaEvent_t stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + half *a; + half *b; + half *c; + + checkCuda(cudaMallocManaged(&a, sizeof(half) * num_elements_a)); + checkCuda(cudaMallocManaged(&b, sizeof(half) * num_elements_b)); + checkCuda(cudaMallocManaged(&c, sizeof(half) * num_elements_c)); + + init(a, num_elements_a); + init(b, num_elements_b); + init(c, num_elements_c); + + // Convert floats to halfs + cudaEventRecord(start, 0); + f2h(af, num_elements_a, a); + f2h(bf, num_elements_b, b); + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&to_fp16_time, start, stop); + + cublasHandle_t handle; + checkCublas(cublasCreate(&handle)); + checkCublas(cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH)); + + const half alpha_ = cpu_float2half_rn(1.0); + const half beta_ = cpu_float2half_rn(0.0); + const half *alpha = &alpha_; + const half *beta = &beta_; + + cudaEventRecord(start, 0); + for (unsigned i = 0; i < iterations; i++) { + checkCublas(cublasGemmEx(handle, + CUBLAS_OP_N, + CUBLAS_OP_N, + // Dimensions + num_rows_a, + num_cols_b, + num_cols_a, + alpha, + // A + a, + CUDA_R_16F, + num_rows_a, + // B + b, + CUDA_R_16F, + num_rows_b, + beta, + // C + c, + CUDA_R_16F, + num_rows_a, + // Compute precision and algorithm + CUDA_R_16F, + CUBLAS_GEMM_DEFAULT_TENSOR_OP)); + } + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&kernel_time, start, stop); + + cudaEventRecord(start, 0); + h2f(c, num_elements_c, cf); + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&to_fp32_time, start, stop); + + total_time = to_fp16_time + (kernel_time / iterations) + to_fp32_time; + std::cout << "FP16 GEMM: " << std::to_string(total_time) << " ms\n"; + std::cout << "\tTo FP16: " << std::to_string(to_fp16_time) << " ms\n"; + std::cout << "\tKernel : " << std::to_string(kernel_time / iterations) << " ms\n"; + std::cout << "\tTo FP32: " << std::to_string(to_fp32_time) << " ms\n"; +} + + +/*int main() { + const unsigned num_rows_a = 5000 * 14 * 14; + const unsigned num_cols_a = 800; + const unsigned num_rows_b = num_cols_a; + const unsigned num_cols_b = 64; + + const unsigned num_elements_a = num_rows_a * num_cols_a; + const unsigned num_elements_b = num_rows_b * num_cols_b; + const unsigned num_elements_c = num_rows_a * num_cols_b; + + float *a; + float *b; + float *c; + + checkCuda(cudaMallocManaged(&a, sizeof(float) * num_elements_a)); + checkCuda(cudaMallocManaged(&b, sizeof(float) * num_elements_b)); + checkCuda(cudaMallocManaged(&c, sizeof(float) * num_elements_c)); + + init(a, num_elements_a); + init(b, num_elements_b); + init(c, num_elements_c); + + // FP32 + sgemm(a, num_rows_a, num_cols_a, b, num_rows_b, num_cols_b, c); + printArray(c, 16); + + // FP16 + hgemm(a, num_rows_a, num_cols_a, b, num_rows_b, num_cols_b, c); + printArray(c, 16); + + checkCuda(cudaFree(a)); + checkCuda(cudaFree(b)); + checkCuda(cudaFree(c)); + + return 0; +} +*/ diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h new file mode 100644 index 0000000000000000000000000000000000000000..35cca55ac2a8b8e60f30d0e9b310a3b3b6edcc82 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h @@ -0,0 +1,39 @@ + +#ifndef GLOBAL_DATA_HEADER +#define GLOBAL_DATA_HEADER + + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> + +#include <cuda_runtime.h> +#include <device_launch_parameters.h> + +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> + + +#define ERROR_INJECTION_ENABLED 0 + +#ifdef NO_INJECTION +#undef ERROR_INJECTION_ENABLED +#endif + + +/* Data declarations */ +cudnnHandle_t cudnnHandle; +cublasHandle_t cublasHandle; + +int op_counter = 0; +int total_ops = 0; +std::vector<int> op_accuracies; + +std::vector<void*> tensors_ptr; +std::vector<void*> host_ptr; +std::vector<void*> obj_ptr; + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h new file mode 100644 index 0000000000000000000000000000000000000000..01e312efe41be74b593b47e2655df29c00043e6e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h @@ -0,0 +1,547 @@ + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <ctime> +#include <cfloat> +#include <iostream> +#include <map> +#include <memory> +#include <random> +#include <sstream> +#include <string> +#include <vector> + +#include <cuda_runtime.h> +#include <device_launch_parameters.h> + +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> +#include <cuda_fp16.h> +#include <driver_types.h> + + +// Tensor runtime header files +#include "../include/tensor_runtime.h" +#include "../include/tensor_utils.cu" +#include "../include/debug.h" +#include "../include/profiling.h" +#include "../include/global_data.h" +#include "../include/tensor.h" +#include "../include/fp16_gemm.cu" + + + +void* tensorHalfGemm(void* lhs_ptr, void* rhs_ptr){ + + INFO("*** TensorHalfGemm \n"); + profileEvent("tensorHalfGemm"); + + Tensor* lhs = (Tensor*) lhs_ptr; + Tensor* rhs = (Tensor*) rhs_ptr; + + INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); + INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); + + hostToDeviceCopy(lhs); + hostToDeviceCopy(rhs); + + size_t* lhs_dims = lhs->dims.dim_sizes; + size_t* rhs_dims = rhs->dims.dim_sizes; + + profileEvent("F2H_start"); + + Tensor* lhs_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + lhs_dims[0], lhs_dims[1], lhs_dims[2], + lhs_dims[3]); + changeTensorPlacement(lhs_half, DEVICE); + + Tensor* rhs_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + rhs_dims[0], rhs_dims[1], rhs_dims[2], + rhs_dims[3]); + changeTensorPlacement(rhs_half, DEVICE); + + + f2h((float*) lhs->gpu_data, lhs->num_elems, (half*) lhs_half->gpu_data); + f2h((float*) rhs->gpu_data, rhs->num_elems, (half*) rhs_half->gpu_data); + + profileEvent("F2H_end"); + + + // 'm' holds the batch dimension - assuming NCHW format Tensors + int m = lhs->dims.dim_sizes[0]; + // The rhs last dimension must contain the neurons + int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons + int k = 1; + + for (int j = 1 ; j < lhs->dims.num_dims; j++){ + k = k * lhs->dims.dim_sizes[j]; // input neurons + } + + int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; + // Dimension-note: Check if k is same across the two tensors + INFO("m = %d, n = %d, k = %d \n", m, n, k); + if(rhs_k != k){ + ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); + } + + // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines + Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + m, n, 1, 1); + Tensor* output_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + m, n, 1, 1); + + // Changing output tensor placement from host to device + changeTensorPlacement(output_half, DEVICE); + changeTensorPlacement(output, DEVICE); + + // Enabing tensor core ops for efficient half precision + + //--- checkCudaErrors(cublasSetMathMode(cublasHandle, CUBLAS_TENSOR_OP_MATH)); + + // INFO: cuBlas uses column-major format + // INFO: The leading dimension is just the FIRST Dimension + // IMP: The output is N * M in column-major format, which is M*N in row-major - what cuDNN expects + const __half alf = approx_float_to_half(1.0); + const __half bet = approx_float_to_half(0.0); + const __half *alpha_half = &alf; + const __half *beta_half = &bet; + + + checkCudaErrors(cublasGemmEx(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, + n, m, k, + alpha_half, + (__half*) rhs_half->gpu_data, CUDA_R_16F, n, + (__half*) lhs_half->gpu_data, CUDA_R_16F, k, + beta_half, + (__half*) output_half->gpu_data, CUDA_R_16F, n, + CUDA_R_16F, CUBLAS_GEMM_DEFAULT_TENSOR_OP) ); + + + profileEvent("H2F_start"); + h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data); + profileEvent("H2F_end"); + + + profileEvent("tensorHalfGemm_end"); + + + freeTensor(lhs_half); + freeTensor(rhs_half); + freeTensor(output_half); + + + return output; +} + + + + + + + +// FIXIT: Generalize all of the routines for types {half, float, double} +void* tensorHalfConvolution(void* input_ptr, void* filter_ptr, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision){ + + INFO("*** TensorHConvolution \n"); + profileEvent("tensorHalfConv"); + + Tensor* input = (Tensor*) input_ptr; + Tensor* filter = (Tensor*) filter_ptr; + + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t convAlgo; + cudnnConvolutionMode_t mode; + if(conv_mode == 0) + mode = CUDNN_CONVOLUTION; + else if(conv_mode == 1) + mode = CUDNN_CROSS_CORRELATION; + + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + // NOTE: compute in half precision + cudnnDataType_t computeType = CUDNN_DATA_HALF; + + // NOTE: Moving inputs to GPU global memory + hostToDeviceCopy(input); + hostToDeviceCopy(filter); + + + /***** CONVERSIONS from FP32 to FP16 - on the GPU */ + size_t* input_dims = input->dims.dim_sizes; + size_t* filter_dims = filter->dims.dim_sizes; + + + profileEvent("F2H_start"); + + Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + input_dims[0], input_dims[1], + input_dims[2], input_dims[3]); + changeTensorPlacement(input_half, DEVICE); + Tensor* filter_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + filter_dims[0], filter_dims[1], + filter_dims[2], filter_dims[3]); + changeTensorPlacement(filter_half, DEVICE); + + + f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data); + f2h((float*) filter->gpu_data, filter->num_elems, (half*) filter_half->gpu_data); + + profileEvent("F2H_end"); + + /******* END OF INPUT DATA CONVERSIONS*/ + + + + checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); + // FIXIT: Think if upscaling values need to be configurable? + // IMP-FIXIT: CUDNN Cross correlation is only used in the Lenet context + // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used? + checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc, + vertical_pad, horizontal_pad, // conv padding + vertical_stride, horizontal_stride, // conv strides + 1, 1, // upscaling values + mode, // mode is configurable + computeType)); // defines compute precision + + int n, c, h, w; // output dimensions + // Find dimension of convolution output + checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc, + input->tensor_desc, + filter->filter_desc, + &n, &c, &h, &w)); + DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); + + + Tensor* output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, + CUDNN_TENSOR_NCHW, n, c, h, w); + // FIXIT: more checks for data types needed + Tensor* output_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, + CUDNN_TENSOR_NCHW, n, c, h, w); + + // NOTE: Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + // NOTE: Necessary to insert the above call for every output tensor + + DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, H = %d, W = %d, C = %d \n", + output->data_type, output->data_format, output->dims.dim_sizes[0], output->dims.dim_sizes[1], + output->dims.dim_sizes[2], output->dims.dim_sizes[3]); + + if(convDesc == NULL || input->tensor_desc == NULL || + filter->filter_desc == NULL || output->tensor_desc == NULL) + ERROR("NULL descriptor! \n"); + + + // NOTE: The following algo works with TRUE half precision + convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; + + size_t workspace_size; + checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle, + input_half->tensor_desc, + filter_half->filter_desc, + convDesc, + output_half->tensor_desc, + convAlgo, + &workspace_size)); + + // Allocating memory for the convolution workspace + DEBUG("workspace size = %d \n", workspace_size); + void* workspace; + checkCudaErrors(cudaMalloc(&workspace, workspace_size)); + + + checkCUDNN(cudnnConvolutionForward(cudnnHandle, + &alpha, + input_half->tensor_desc, + input_half->gpu_data, + filter_half->filter_desc, + filter_half->gpu_data, + convDesc, convAlgo, workspace, workspace_size, + &beta, + output_half->tensor_desc, + output_half->gpu_data)); + + + profileEvent("H2F_start"); + + // NOTE: Transforming half precision output to single precision + h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data); + + profileEvent("H2F_end"); + + profileEvent("tensorHalfConv_end"); + + + freeTensor(input_half); + freeTensor(filter_half); + freeTensor(output_half); + + return output; +} + + + + + + +void* tensorHalfPooling(void* input_ptr, + int poolFunction, + int window_height, int window_width, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride){ + + + INFO("*** TensorHalfPooling \n"); + profileEvent("tensorHalfPooling"); + + Tensor* input = (Tensor*) input_ptr; + size_t* input_dims = input->dims.dim_sizes; + + /** floating point to half conversion */ + profileEvent("F2H_start"); + Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + input_dims[0], input_dims[1], + input_dims[2], input_dims[3]); + changeTensorPlacement(input_half, DEVICE); + + f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data); + + profileEvent("F2H_end"); + //*** end of data conversions + + cudnnPoolingDescriptor_t poolDesc; + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + + hostToDeviceCopy(input); + + checkCUDNN(cudnnCreatePoolingDescriptor(&poolDesc)); + + int n = input->dims.dim_sizes[0]; + int c = input->dims.dim_sizes[1]; + int h = (input->dims.dim_sizes[2] + (2 * vertical_pad) - window_height) / vertical_stride; + h = h + 1; + int w = (input->dims.dim_sizes[3] + (2 * horizontal_pad) - window_width) / horizontal_stride; + w = w + 1; + + DEBUG("n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); + + // FIXIT: Don't be specific to floats + Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, n, c, h, w); + // Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + + // FIXIT: more checks for data types needed + Tensor* output_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, + CUDNN_TENSOR_NCHW, n, c, h, w); + + + + // FIXIT: Fix being specific to CUDNN_DATA_FLOAT and NCHW format + // FIXIT: Is this setTensor even needed? + checkCUDNN(cudnnSetTensor4dDescriptor(output_half->tensor_desc, + CUDNN_TENSOR_NCHW, + CUDNN_DATA_HALF, + n, c, + h, w)); + + // FIXIT: Make the pool function (max, min, avg) configurable + checkCUDNN(cudnnSetPooling2dDescriptor(poolDesc, + CUDNN_POOLING_MAX, + CUDNN_PROPAGATE_NAN, + window_height, window_width, + vertical_pad, horizontal_pad, + vertical_stride, horizontal_stride)); + + checkCUDNN(cudnnPoolingForward(cudnnHandle, poolDesc, &alpha, input_half->tensor_desc, + input_half->gpu_data, &beta, output_half->tensor_desc, output_half->gpu_data)); + + + + profileEvent("H2F_start"); + + // NOTE: Transforming half precision output to single precision + h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data); + + profileEvent("H2F_end"); + + profileEvent("tensorHalfPooling_end", true); + + + freeTensor(input_half); + freeTensor(output_half); + + + return output; +} + + + + + +void* tensorHalfRelu2(void* input_ptr, float min, float max){ + + INFO("*** TensorClippedRelu \n"); + profileEvent("tensorHalfClippedRelu"); + + Tensor* input = (Tensor*) input_ptr; + size_t* input_dims = input->dims.dim_sizes; + + cudnnActivationDescriptor_t reluDesc; + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(input); + + + //**** Floating point to half conversions + profileEvent("F2H_start"); + Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + input_dims[0], input_dims[1], + input_dims[2], input_dims[3]); + changeTensorPlacement(input_half, DEVICE); + + // Data conversion from float to half + f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data); + + profileEvent("F2H_end"); + /*** End of data type conversion **/ + + + checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc)); + + checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_CLIPPED_RELU, + CUDNN_PROPAGATE_NAN, 2.0)); + + checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha, + input_half->tensor_desc, input_half->gpu_data, &beta, + input_half->tensor_desc, input_half->gpu_data)); + + + profileEvent("H2F_start"); + // NOTE: Transforming half precision output to single precision + h2f((half*) input_half->gpu_data, input->num_elems, (float*) input->gpu_data); + + profileEvent("H2F_end"); + + profileEvent("tensorHalfClippedRelu_end"); + + + freeTensor(input_half); + + return input; +} + + + + + + +void* tensorHalfTanh(void* input_ptr){ + + INFO("*** TensorHalfTanh \n"); + profileEvent("tensorHalfTanh"); + + + Tensor* input = (Tensor*) input_ptr; + size_t* input_dims = input->dims.dim_sizes; + + cudnnActivationDescriptor_t tanhDesc; + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(input); + + + //**** Data conversion from float to half + profileEvent("F2H_start"); + Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + input_dims[0], input_dims[1], + input_dims[2], input_dims[3]); + changeTensorPlacement(input_half, DEVICE); + + f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data); + + profileEvent("F2H_end"); + /**** End of data type conversion ****/ + + + checkCUDNN(cudnnCreateActivationDescriptor(&tanhDesc)); + + checkCUDNN(cudnnSetActivationDescriptor(tanhDesc, CUDNN_ACTIVATION_TANH, + CUDNN_PROPAGATE_NAN, 0.0)); + + checkCUDNN(cudnnActivationForward(cudnnHandle, tanhDesc, &alpha, + input_half->tensor_desc, input_half->gpu_data, &beta, + input_half->tensor_desc, input_half->gpu_data)); + + profileEvent("H2F_start"); + // NOTE: Transforming half precision output to single precision + h2f((half*) input_half->gpu_data, input->num_elems, (float*) input->gpu_data); + profileEvent("H2F_end"); + + profileEvent("tensorHalfTanh_end"); + + + freeTensor(input_half); + + return input; +} + + + +void* tensorHalfAdd(void* x_ptr, void* bias_ptr){ + + Tensor* x = (Tensor*) x_ptr; + Tensor* bias = (Tensor*) bias_ptr; + + INFO("*** TensorHalfAdd \n"); + profileEvent("tensorHalfAdd"); + + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(x); + hostToDeviceCopy(bias); + + size_t* x_dims = x->dims.dim_sizes; + size_t* bias_dims = bias->dims.dim_sizes; + + + //**** Data conversion from float to half + profileEvent("F2H_start"); + Tensor* x_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + x_dims[0], x_dims[1], x_dims[2], + x_dims[3]); + changeTensorPlacement(x_half, DEVICE); + + Tensor* bias_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, + bias_dims[0], bias_dims[1], bias_dims[2], + bias_dims[3]); + changeTensorPlacement(bias_half, DEVICE); + + f2h((float*) x->gpu_data, x->num_elems, (half*) x_half->gpu_data); + f2h((float*) bias->gpu_data, bias->num_elems, (half*) bias_half->gpu_data); + + profileEvent("F2H_end"); + /*** End of data type conversions ****/ + + + // FIXIT: routine fails for 3D tensors + checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias_half->tensor_desc, + bias_half->gpu_data, &alpha, x_half->tensor_desc, x_half->gpu_data)); + + + profileEvent("H2F_start"); + // NOTE: Transforming half precision output to single precision + h2f((half*) x_half->gpu_data, x->num_elems, (float*) x->gpu_data); + profileEvent("H2F_end"); + + profileEvent("tensorHalfAdd_end"); + + + freeTensor(x_half); + freeTensor(bias_half); + + return x; +} diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h new file mode 100644 index 0000000000000000000000000000000000000000..5ea0b28545cbd81a59735d0b06c839a7f991ed35 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h @@ -0,0 +1,229 @@ + + +#ifndef OP_OVERHEADS_HEADER +#define OP_OVERHEADS_HEADER + + +#include <math.h> +#include <sstream> +#include "tensor.h" + + +float scale_down_factor = 10000.0; +std::string result_str = ""; + + +// TODO: Every routine needs testing + +static float scaleDownComps(double total_comps){ + + total_comps = total_comps / scale_down_factor; + return total_comps; +} + +// private function +static float getScaledComps(double total_comps, int error_scale, int factor_type){ + + double scaled_comps; + + // Logarithmic error factor scaling - higher error, lower cost + if(factor_type == 1){ + float error_factor = log2((float) error_scale + 3); + scaled_comps = total_comps / error_factor; + } + // Linear error factor scaling + if(factor_type == 2){ + scaled_comps = total_comps / (error_scale + 1); + } + // Quadratic error factor scaling (scaling down) + if(factor_type == 3){ + error_scale = (error_scale + 1) * (error_scale + 1); + scaled_comps = total_comps / error_scale; + } + + + return scaled_comps; +} + + +static void addNormToResult(float comps){ + + std::ostringstream ss; + ss << std::fixed << comps; + + result_str.append( std::string(ss.str()) ); + result_str.append("\t"); +} + + + +static void addCompsToResult(float total_comps, float opt_comps1, float opt_comps2, float opt_comps3){ + + std::ostringstream ss; + ss << std::fixed << total_comps; + result_str.append( std::string(ss.str()) ); + result_str.append("\t"); + + std::ostringstream ss2; + ss2 << std::fixed << opt_comps1; + result_str.append( std::string(ss2.str()) ); + result_str.append("\t"); + + std::ostringstream ss3; + ss3 << std::fixed << opt_comps2; + result_str.append( std::string(ss3.str()) ); + result_str.append("\t"); + + std::ostringstream ss4; + ss4 << std::fixed << opt_comps3; + result_str.append( std::string(ss4.str()) ); + result_str.append("\n"); +} + + +void dumpCompOverheads(double total_comps, int error_scale){ + + total_comps = scaleDownComps(total_comps); + + float scaled_comps1 = getScaledComps(total_comps, error_scale, 1); // Log scaling + float scaled_comps2 = getScaledComps(total_comps, error_scale, 2); // Linear scaling + float scaled_comps3 = getScaledComps(total_comps, error_scale, 3); // Quadratic scaling + + //INFO("error_scale = %d, total_comps = %f, scaled_comps = %f \n", + // error_scale, total_comps, scaled_comps1); + + addCompsToResult(total_comps, scaled_comps1, scaled_comps2, scaled_comps3); +} + + + +void add_conv_overheads(void* input_ptr, void* filter_ptr, + int vertical_stride, int horizontal_stride, + int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + Tensor* filter = (Tensor*) filter_ptr; + + double kernel_comps = filter->dims.dim_sizes[0] * filter->dims.dim_sizes[1] * + filter->dims.dim_sizes[2] * filter->dims.dim_sizes[3]; + + double H_in = input->dims.dim_sizes[2] / vertical_stride; + double W_in = input->dims.dim_sizes[3] / horizontal_stride; + double N_in = input->dims.dim_sizes[0]; // batch Dimension + + double total_comps = N_in * H_in * W_in * kernel_comps; + + dumpCompOverheads(total_comps, error_scale); + +} + + +void add_gemm_overheads(void* lhs_ptr, void* rhs_ptr, int error_scale){ + + Tensor* lhs = (Tensor*) lhs_ptr; + Tensor* rhs = (Tensor*) rhs_ptr; + + int m = lhs->dims.dim_sizes[0]; + // The rhs last dimension must contain the neurons + int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons + int k = 1; + + // Flattening the dimensions after the batch dimension + for (int j = 1 ; j < lhs->dims.num_dims; j++){ + k = k * lhs->dims.dim_sizes[j]; // input neurons + } + + int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; + // Dimension-note: Check if k is same across the two tensors + + //printf("m = %d, n = %d, k = %d \n", m, n, k); + + if(rhs_k != k){ + printf("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); + abort(); + } + + double m_d = m; + double n_d = n; + double rhs_k_d = rhs_k; + + double total_comps = m_d * n_d * rhs_k_d * 1.0; + dumpCompOverheads(total_comps, error_scale); + +} + + +void add_bias_overheads(void* input_ptr, int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + double total_comps = input->num_elems; + + dumpCompOverheads(total_comps, error_scale); + } + + +void add_relu_overheads(void* input_ptr, int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + double total_comps = input->num_elems; + + dumpCompOverheads(total_comps, error_scale); +} + + +float add_pool_overheads(void* input_ptr, int kernel_size, + int stride_size, int error_scale){ + + Tensor* input = (Tensor*) input_ptr; + + int num_dims = input->dims.num_dims; + double H = input->dims.dim_sizes[num_dims-2]; + double W = input->dims.dim_sizes[num_dims-1]; + double C = input->dims.dim_sizes[1]; // channel dimension + double N = input->dims.dim_sizes[0]; // batch dimension + + H = H / stride_size; + W = W / stride_size; + + double total_comps = N * C * H * W * kernel_size * kernel_size; + + dumpCompOverheads(total_comps, error_scale); + +} + + +void add_norms(void* norms_ptr, char* op_name, int error_value){ + + // Print operation name - {tensorAdd, tensorPool, tensorGemm} + result_str.append(op_name); + result_str.append("\t"); + + addNormToResult(error_value); + + Norm_t* norms = (Norm_t*) norms_ptr; + + addNormToResult(norms->mean_l1); + addNormToResult(norms->mean_l2); + addNormToResult(norms->orig_inf_norm); + + addNormToResult(norms->l1_norm); + addNormToResult(norms->l2_norm); + addNormToResult(norms->inf_norm); +} + + +void dump_result(char* file_name){ + + FILE* fp = fopen(file_name, "w+"); + if(fp != NULL){ + fwrite(result_str.c_str(), 1, result_str.length(), fp); + fclose(fp); + } + else{ + ERROR("Could not create file \n"); + } + + result_str = ""; +} + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h new file mode 100644 index 0000000000000000000000000000000000000000..8eb7f92ab014c00adb9b89875c375d82546c9f38 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h @@ -0,0 +1,93 @@ + +#ifndef PROFILING_HEADER +#define PROFILING_HEADER + + + +#include <stdio.h> +#include <stdarg.h> +#include <ctime> +#include <chrono> +#include <iostream> +#include <map> +#include <memory> +#include <random> +#include <string> +#include <unordered_map> + + + +/***** Profiling routines ***/ + +std::unordered_map<std::string, int> func_counters; +std::string profile_data = ""; + +std::chrono::time_point<std::chrono::high_resolution_clock> start_time; +// previous_time maintains time for the latest timed operation +std::chrono::time_point<std::chrono::high_resolution_clock> previous_time; + +extern "C"{ + + void startProfiling(){ + start_time = std::chrono::high_resolution_clock::now(); + } + + void stopProfiling(){ + + FILE* fp = fopen("profile_data.txt", "w+"); + if(fp != NULL){ + fwrite(profile_data.c_str(), 1, profile_data.length(), fp); + fclose(fp); + } + + profile_data = ""; + func_counters.clear(); + } + + + void profileEvent(char* event_name, bool compare_previous = false){ + + auto it = func_counters.find(event_name); + if(it == func_counters.end()){ + func_counters[event_name] = 1; + } + else{ + int counter = func_counters[event_name]; + counter++; + func_counters[event_name] = counter; + } + + std::stringstream ss; + ss << func_counters[event_name]; + std::string event_count = ss.str(); + + + std::chrono::time_point<std::chrono::high_resolution_clock> zero_time; + std::chrono::time_point<std::chrono::high_resolution_clock> time_reading = + std::chrono::high_resolution_clock::now(); + std::chrono::duration<double, std::ratio<1>> current_time = + time_reading - zero_time; + + INFO("AbsoluteTime, Event = %s, Time = %f \n", event_name, current_time.count()); + profile_data.append(event_name); + profile_data.append(event_count); + profile_data.append("\t"); + profile_data.append(std::to_string(current_time.count())); + + if(compare_previous){ + std::chrono::duration<double, std::ratio<1>> duration_time = + time_reading - previous_time; + + profile_data.append("\t"); + profile_data.append(std::to_string(duration_time.count())); + INFO("TimeDuration, Event = %s, Time = %f \n", event_name, duration_time.count()); + } + + profile_data.append("\n"); + + previous_time = time_reading; // set the previous time reading to the current profiled time + } + +} + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..08b4369fca5fbc28fc2b3c3dbe31fa81e85e7ff6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h @@ -0,0 +1,53 @@ + + +#ifndef TENSOR_HEADER +#define TENSOR_HEADER + +#include <cuda_runtime.h> +#include <device_launch_parameters.h> +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> +#include <cuda_fp16.h> +#include <driver_types.h> + + + +struct Norm_t{ + float mean_l1; + float mean_l2; + float orig_inf_norm; + float l0_norm; + float l1_norm; + float l2_norm; + float inf_norm; +}; + + +struct Dimension{ + int num_dims; + size_t* dim_sizes; +}; + +enum data_location_t{ + HOST, + DEVICE +}; + + +struct Tensor{ + int data_type; + int data_format; + data_location_t data_placement; // Maintains the location of the tensor {host, device...} + cudnnTensorDescriptor_t tensor_desc; + cudnnFilterDescriptor_t filter_desc; // FIXIT: Rethink if this should be in tensor struct + void* host_data; + void* gpu_data; // The pointers should not be device specific per se - TODO: Better design needed + size_t num_elems; // Total elements + size_t size_in_bytes; // Total size in bytes + struct Dimension dims; +}; + + +#endif + diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.cc b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.cc new file mode 100644 index 0000000000000000000000000000000000000000..2216172eab78414b46814e0d457908f5584c606a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.cc @@ -0,0 +1,114 @@ + +#include <stdio.h> +#include <cstdlib> +#include <cmath> +#include <memory> +#include <string> + + +#ifndef CUDNN_HEADER +#define CUDNN_HEADER + + +extern "C"{ + /**** Initialization Routine - Must be inserted at program start (in the backend) ****/ + void llvm_hpvm_initTensorRt(int gpuid = 0); + void llvm_hpvm_cleanupTensorRt(); + + // Routine to moving tensor data (from and to GPU,CPU) + void hpvm_request_tensor(void* tensor, int destination); + + /****** Profiling API - defines profiling scope */ + void startProfiling(); + void stopProfiling(); + + /****** Routines for tensor creation and initialization *******/ + void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size); + void* create3DTensor(int data_type, size_t dim1_size, size_t dim2_size, + size_t dim3_size); + + // NOTE: Currently only using 4-D tensors - 2D and 3D tensors not supported for cuDNN operations + // NOTE: The only data format supported as of now is: CUDNN_NCHW + void* create4DTensor(int data_type, int data_format, size_t dim1_size, size_t dim2_size, + size_t dim3_size, size_t dim4_size); + void initTensorData(void* tensor, void* data_ptr, size_t size_in_bytes); + + /********** Tensor Operation API ******/ + + void** tensorSplit(void* tensor, int num_splits, int split_dim); + void* tensorConcat(void** tensors, int num_splits, int split_dim); + + // NOTE: For conv_mode, only value '1' is supported + void* tensorConvolution(void* input, void* filter, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision); + void* tensorHConvolution(void* input, void* filter, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision); + + void* tensorPooling(void* input, + int poolFunction, + int window_height, int window_width, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride); + + void* tensorLRN(void* input, unsigned int LRN_window, + double LRN_alpha, double LRN_beta, double LRN_k); + + + /* 4 different Gemm versions */ + void* tensorGemm(void* lhs, void* rhs); + void* tensorGemmCPU(void* lhs, void* rhs); + void* tensorGemmGPU(void* lhs, void* rhs); + void* tensorHgemm(void* lhs, void* rhs); + + + // NOTE: In-place operation + void* tensorGemmBias(void* input, void* bias); + // NOTE: In place operation + void* tensorAdd(void* x, void* bias); + // NOTE: In-place operation + void* tensorRelu(void* input); + // NOTE: In-place operation + void* tensorSoftmax(void* input); + + /* Error injection API - used for accuracy tuning */ + void* tensorAddError(void* x_ptr); +} + + + +void emptyFunction(){ + + void* initRT = (void*) &llvm_hpvm_initTensorRt; + void* cleanRT = (void*) &llvm_hpvm_cleanupTensorRt; + void* request_tensorPtr = (void*) &hpvm_request_tensor; + void* startProf = (void*) &startProfiling; + void* stopProf = (void*) &stopProfiling; + void* create2Dptr = (void*) &create2DTensor; + void* create3Dptr = (void*) &create3DTensor; + void* create4Dptr = (void*) &create4DTensor; + void* initTensorPtr = (void*) &initTensorData; + void* tensorSplitPtr = (void*) &tensorSplit; + void* tensorConcatPtr = (void*) &tensorConcat; + void* tensorConvPtr = (void*) &tensorConvolution; + void* tensorHConvPtr = (void*) &tensorHConvolution; + void* tensorPoolPtr = (void*) &tensorPooling; + void* tensorLRNPtr = (void*) &tensorLRN; + void* tensorGemmPr = (void*) &tensorGemm; + void* tensorGemmCPUPtr = (void*) &tensorGemmCPU; + void* tensorGemmGPUPtr = (void*) &tensorGemmGPU; + void* tensorHgemmPtr = (void*) &tensorHgemm; + void* tensorGemmBiasPtr = (void*) &tensorGemmBias; + void* tensorAddPtr = (void*) &tensorAdd; + void* tensorReluPtr = (void*) &tensorRelu; + void* tensorSoftmaxPtr = (void*) &tensorSoftmax; + void* tensorAddErrorPtr = (void*) &tensorAddError; + +} + + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h new file mode 100644 index 0000000000000000000000000000000000000000..527d88c77bdec82e61bc37e411d79ec7485208ca --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h @@ -0,0 +1,142 @@ + +#include <stdio.h> +#include <cstdlib> +#include <cmath> +#include <memory> +#include <string> + + +#ifndef CUDNN_HEADER +#define CUDNN_HEADER + + +extern "C"{ + /**** Initialization Routine - Must be inserted at program start (in the backend) ****/ + void llvm_hpvm_initTensorRt(int gpuid = 0); + void llvm_hpvm_cleanupTensorRt(); + + // Routine to moving tensor data (from and to GPU,CPU) + void hpvm_request_tensor(void* tensor, int destination); + + /****** Profiling API - defines profiling scope */ + void startProfiling(); + void stopProfiling(); + + /****** Routines for tensor creation and initialization *******/ + void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size); + void* create3DTensor(int data_type, size_t dim1_size, size_t dim2_size, + size_t dim3_size); + + // NOTE: Currently only using 4-D tensors - 2D and 3D tensors not supported for cuDNN operations + // NOTE: The only data format supported as of now is: CUDNN_NCHW + void* create4DTensor(int data_type, int data_format, size_t dim1_size, size_t dim2_size, + size_t dim3_size, size_t dim4_size); + void initTensorData(void* tensor, void* data_ptr, size_t size_in_bytes); + + void freeTensor(void*); + + /********** Tensor Operation API ******/ + + void** tensorSplit(void* tensor, int num_splits, int split_dim); + void* tensorConcat(void** tensors, int num_splits, int split_dim); + + // NOTE: For conv_mode, only value '1' is supported + void* tensorConvolution(void* input, void* filter, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision); + void* tensorHalfConvolution(void* input, void* filter, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision); + + void* tensorPooling(void* input, + int poolFunction, + int window_height, int window_width, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride); + + void* tensorHalfPooling(void* input, + int poolFunction, + int window_height, int window_width, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride); + + + void* tensorLRN(void* input, unsigned int LRN_window, + double LRN_alpha, double LRN_beta, double LRN_k); + + + /* 4 different Gemm versions */ + void* tensorGemm(void* lhs, void* rhs); + void* tensorGemmCPU(void* lhs, void* rhs); + void* tensorGemmGPU(void* lhs, void* rhs); + void* tensorHalfGemm(void* lhs, void* rhs); + + + // NOTE: In-place operation + void* tensorGemmBias(void* input, void* bias); + // NOTE: In place operation + void* tensorAdd(void* x, void* bias); + // NOTE: In place operation + void* tensorHalfAdd(void* x, void* bias); + // NOTE: In-place operation + void* tensorRelu(void* input); + // NOTE: In-place operation + + void* tensorTanh(void* input); + // NOTE: In-place operation + void* tensorHalfTanh(void* input); + + // NOTE: In-place operation + void* tensorRelu2(void* input, float min, float max); + // NOTE: In-place operation + void* tensorHalfRelu2(void* input, float min, float max); + // NOTE: In-place operation + void* tensorSoftmax(void* input); + + /* Error injection API - used for accuracy tuning */ + void* tensorAddError(void* x_ptr, int error_scale); +} + + +void dumpAccuracyNorms(); +void readOpenTunerFlags(char* file_name); +void clearOpCounter(); +void clearTensorMap(); +void freeOutputTensors(); + + +/*void emptyFunction(){ + + void* initRT = (void*) &llvm_hpvm_initTensorRt; + void* cleanRT = (void*) &llvm_hpvm_cleanupTensorRt; + void* request_tensorPtr = (void*) &hpvm_request_tensor; + void* startProf = (void*) &startProfiling; + void* stopProf = (void*) &stopProfiling; + void* create2Dptr = (void*) &create2DTensor; + void* create3Dptr = (void*) &create3DTensor; + void* create4Dptr = (void*) &create4DTensor; + void* initTensorPtr = (void*) &initTensorData; + void* tensorSplitPtr = (void*) &tensorSplit; + void* tensorConcatPtr = (void*) &tensorConcat; + void* tensorConvPtr = (void*) &tensorConvolution; + void* tensorHConvPtr = (void*) &tensorHConvolution; + void* tensorPoolPtr = (void*) &tensorPooling; + void* tensorLRNPtr = (void*) &tensorLRN; + void* tensorGemmPr = (void*) &tensorGemm; + void* tensorGemmCPUPtr = (void*) &tensorGemmCPU; + void* tensorGemmGPUPtr = (void*) &tensorGemmGPU; + void* tensorHgemmPtr = (void*) &tensorHgemm; + void* tensorGemmBiasPtr = (void*) &tensorGemmBias; + void* tensorAddPtr = (void*) &tensorAdd; + void* tensorReluPtr = (void*) &tensorRelu; + void* tensorSoftmaxPtr = (void*) &tensorSoftmax; + void* tensorAddErrorPtr = (void*) &tensorAddError; +} + +*/ + + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu new file mode 100644 index 0000000000000000000000000000000000000000..e5db155e1e6ed62beee1caec7002ba4bd099472d --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu @@ -0,0 +1,385 @@ + +#ifndef TENSOR_UTILS_HEADER +#define TENSOR_UTILS_HEADER + + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cmath> +#include <ctime> +#include <cfloat> +#include <algorithm> +#include <chrono> +#include <iomanip> +#include <iostream> +#include <map> +#include <memory> +#include <random> +#include <sstream> +#include <string> + +#include <cuda_runtime.h> +#include <device_launch_parameters.h> + +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> +#include <vector> + +#include "../../dnn_sources/include/types.h" +#include "tensor_runtime.h" +#include "debug.h" +#include "tensor.h" +#include "global_data.h" + + +// used to map HPVM tensors to runtime tensors (with extra runtime-specific information) +std::vector<Tensor*> tensorsArr; +int total_tensors = 0; + + + +void addRuntimeTensor(struct Tensor_t* hpvm_tensor, struct Tensor* tensor){ + + hpvm_tensor->tensor_id = total_tensors; + total_tensors++; + tensorsArr.push_back(tensor); +} + + +struct Tensor* getRuntimeTensor(struct Tensor_t* hpvm_tensor){ + int tensor_id = hpvm_tensor->tensor_id; + if(tensor_id < total_tensors) + return tensorsArr[tensor_id]; + else + ERROR("Tensor not found in runtime. Aborting ..."); +} + + + +void freeTensor(void* tensor_ptr){ + + Tensor* tensor = (Tensor*) tensor_ptr; + + for(int i = 0; i < tensors_ptr.size(); i++){ + if(tensors_ptr[i] == tensor->gpu_data) + tensors_ptr.erase(tensors_ptr.begin()+i); + } + + for(int i = 0; i < host_ptr.size(); i++){ + if(host_ptr[i] == tensor->host_data) + host_ptr.erase(host_ptr.begin()+i); + } + + for(int i = 0; i < obj_ptr.size(); i++){ + if(obj_ptr[i] == tensor_ptr) + obj_ptr.erase(obj_ptr.begin()+i); + } + + + cudaFree(tensor->gpu_data); + tensor->gpu_data = NULL; + free(tensor->host_data); + tensor->host_data = NULL; + +} + + +// Returns the size of the target cudnn datatype +int getTypeSize(int data_type){ + if(data_type == CUDNN_DATA_FLOAT) + return 4; + + if(data_type == CUDNN_DATA_HALF) + return 2; + + INFO("Given type = %d, expected type = %d \n", data_type, CUDNN_DATA_FLOAT); + return 1; + // TODO: Add support for more data types +} + + +void setSizeInBytes(struct Tensor* tensor, int data_type, size_t num_elems){ + int type_size = getTypeSize(data_type); + size_t size_in_bytes = type_size * num_elems; + tensor->size_in_bytes = size_in_bytes; +} + + +// NOTE: Will need to extend this fucntion to support other device allocations +void allocateMem(struct Tensor* tensor, int data_type, size_t num_elems){ + setSizeInBytes(tensor, data_type, num_elems); + tensor->data_type = data_type; + tensor->num_elems = num_elems; + tensor->host_data = (void*) malloc(tensor->size_in_bytes); // Allocate memory on the host + tensor->data_placement = HOST; // By defaut data is on the host + checkCudaErrors(cudaMalloc(&tensor->gpu_data, tensor->size_in_bytes)); // Allocate memory on GPU + + tensors_ptr.push_back(tensor->gpu_data); + host_ptr.push_back(tensor->host_data); + obj_ptr.push_back(tensor); + //host_ptr.push_back(tensor->host_data); +} + + +void setCudnnDataFormat(struct Tensor* tensor, int data_format){ + + switch(data_format){ + case 0: + data_format = CUDNN_TENSOR_NCHW; break; + case 1: + data_format = CUDNN_TENSOR_NHWC; break; + + default: + break; + } + + tensor->data_format = data_format; + DEBUG("tensor->data_format = %d \n", tensor->data_format); +} + + +void set4DFilterDescriptor(struct Tensor* tensor, int data_format, size_t dim1_size, + size_t dim2_size, size_t dim3_size, size_t dim4_size){ + + setCudnnDataFormat(tensor, data_format); + + checkCUDNN(cudnnCreateFilterDescriptor(&tensor->filter_desc)); + + checkCUDNN(cudnnSetFilter4dDescriptor(tensor->filter_desc, + (cudnnDataType_t) tensor->data_type, + (cudnnTensorFormat_t) tensor->data_format, + dim1_size, + dim2_size, + dim3_size, + dim4_size)); +} + + + +void set4DTensorDescriptor(struct Tensor* tensor, int data_format, size_t dim1_size, + size_t dim2_size, size_t dim3_size, size_t dim4_size){ + + setCudnnDataFormat(tensor, data_format); + + checkCUDNN(cudnnCreateTensorDescriptor(&tensor->tensor_desc)); + + // For certain operations, the strides may need to change - in which case the descriptor + // needs to be reinitialized + // FIXIT: Only specific to floats - make generic and test + cudnnSetTensor4dDescriptor(tensor->tensor_desc, + (cudnnTensorFormat_t) tensor->data_format, // Data format + (cudnnDataType_t) tensor->data_type, // Data type + dim1_size, dim2_size, + dim3_size, dim4_size); + + cudnnDataType_t dType; + int nStride, cStride, hStride, wStride; + int size1, size2, size3, size4; + cudnnGetTensor4dDescriptor(tensor->tensor_desc, + &dType, + &size1, &size2, &size3, &size4, + &nStride, &cStride, &hStride, &wStride); + + INFO("nStride = %d, cStride = %d, hStride = %d, wStride = %d \n", + nStride, cStride, hStride, wStride); +} + + +// FIXIT: Striding still not working - hence 2D and 3D tensor support is missing +void setTensorDescriptor(struct Tensor* tensor, int num_dims, + size_t* dim_sizes){ + + checkCUDNN(cudnnCreateTensorDescriptor(&tensor->tensor_desc)); + + int* strides = (int*) malloc(sizeof(int) * num_dims); + strides[num_dims - 1] = 1; + for(int i = num_dims - 2; i >= 0; i--){ + strides[i] = strides[i+1] * dim_sizes[i+1]; + } + + for(int i = 0; i < num_dims; i++){ + INFO("strides[%d] = %d \n", i, strides[i]); + } + + int* const_dims = (int*) malloc(sizeof(int) * num_dims); + for(int j = 0 ; j < num_dims; j++){ + const_dims[j] = (int) dim_sizes[j]; + INFO("const_dim = %d \n", const_dims[j]); + } + + INFO("data_type = %d, cuDNN_value = %d \n", tensor->data_type, CUDNN_DATA_FLOAT); + // For certain operations, the strides may need to change - in which case the descriptor + // needs to be reinitialized + checkCUDNN(cudnnSetTensorNdDescriptor(tensor->tensor_desc, + (cudnnDataType_t) tensor->data_type, // Data type + num_dims, + (const int*) const_dims, + (const int*) strides)); +} + + + + +extern "C"{ + + void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size){ + struct Tensor* tensor = (struct Tensor*) malloc(sizeof(Tensor)); + size_t num_elems = dim1_size * dim2_size; + allocateMem(tensor, data_type, num_elems); + // Setting the tensor dimensions + size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * 2); + dim_sizes[0] = dim1_size; + dim_sizes[1] = dim2_size; + tensor->dims.dim_sizes = dim_sizes; + tensor->dims.num_dims = 2; + + return tensor; + } + + + void* create3DTensor(int data_type, size_t dim1_size, size_t dim2_size, + size_t dim3_size){ + struct Tensor* tensor = (struct Tensor*) malloc(sizeof(Tensor)); + size_t num_elems = dim1_size * dim2_size * dim3_size; + allocateMem(tensor, data_type, num_elems); + // Setting the tensor dimensions + size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * 3); + dim_sizes[0] = dim1_size; + dim_sizes[1] = dim2_size; + dim_sizes[2] = dim3_size; + tensor->dims.dim_sizes = dim_sizes; + tensor->dims.num_dims = 3; + + return tensor; + } + + + void* create4DTensor(int data_type, int data_format, size_t dim1_size, size_t dim2_size, + size_t dim3_size, size_t dim4_size){ + struct Tensor* tensor = (struct Tensor*) malloc(sizeof(Tensor)); + size_t num_elems = dim1_size * dim2_size * dim3_size * dim4_size; + allocateMem(tensor, data_type, num_elems); + // Setting the tensor dimensions + size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * 4); + dim_sizes[0] = dim1_size; + dim_sizes[1] = dim2_size; + dim_sizes[2] = dim3_size; + dim_sizes[3] = dim4_size; + tensor->dims.dim_sizes = dim_sizes; + tensor->dims.num_dims = 4; + // Done setting tensor dimensions + //setTensorDescriptor(tensor, 4, dim_sizes); + set4DTensorDescriptor(tensor, data_format, dim1_size, dim2_size, dim3_size, dim4_size); + // FIXIT: filter descriptor should be invoked only for filters + set4DFilterDescriptor(tensor, data_format, dim1_size, dim2_size, dim3_size, dim4_size); + + return tensor; + } + + + void initTensorData(void* tensor_ptr, void* data_ptr, size_t size_in_bytes){ + + Tensor* tensor = (Tensor*) tensor_ptr; + + if(tensor->size_in_bytes != size_in_bytes){ + ERROR("The destination and source sizes don't match"); + } + + std::memcpy(tensor->host_data, data_ptr, size_in_bytes); + } + + + + void hostToDeviceCopy(struct Tensor* tensor){ + + if(tensor->data_placement != DEVICE){ + cudaMemcpy(tensor->gpu_data, tensor->host_data, tensor->size_in_bytes, + cudaMemcpyHostToDevice); + INFO("Moving %d bytes from host to GPU \n", tensor->size_in_bytes); + tensor->data_placement = DEVICE; + } + else{ + DEBUG("No data movement required - Data on Device \n"); + } + + } + + + void deviceToHostCopy(struct Tensor* tensor){ + + if(tensor->data_placement != HOST){ + cudaMemcpy(tensor->host_data, tensor->gpu_data, tensor->size_in_bytes, + cudaMemcpyDeviceToHost); + INFO("Moving %d bytes from GPU to host \n", tensor->size_in_bytes); + tensor->data_placement = HOST; + } + else{ + DEBUG("No data movement required - Data on Host \n"); + } + + } + + + void tensorCopy(struct Tensor* srcTensor, struct Tensor* dstTensor){ + + if(srcTensor->data_placement == HOST){ + memcpy(dstTensor->host_data, srcTensor->host_data, srcTensor->size_in_bytes); + INFO("Moving %d bytes from host to host \n", srcTensor->size_in_bytes); + dstTensor->data_placement = HOST; + } + else if (srcTensor->data_placement == DEVICE){ + cudaMemcpy(dstTensor->gpu_data, srcTensor->gpu_data, srcTensor->size_in_bytes, + cudaMemcpyDeviceToDevice); + INFO("Moving %d bytes from GPU to GPU \n", srcTensor->size_in_bytes); + dstTensor->data_placement = DEVICE; + } + + } + + + void hpvm_request_tensor(void* tensor_ptr, int destination){ + + Tensor* tensor = (Tensor*) tensor_ptr; + // If destination is the host + if(destination == 0){ + if(tensor->data_placement != HOST){ + cudaMemcpy(tensor->host_data, tensor->gpu_data, tensor->size_in_bytes, + cudaMemcpyDeviceToHost); + DEBUG("Moving %d bytes from GPU to host \n", tensor->size_in_bytes); + tensor->data_placement = HOST; + } + else{ + DEBUG("No data movement required - Data on Host \n"); + } + } + // If destination is the GPU + else if(destination == 1){ + + if(tensor->data_placement != DEVICE){ + cudaMemcpy(tensor->gpu_data, tensor->host_data, tensor->size_in_bytes, + cudaMemcpyHostToDevice); + INFO("Moving %d bytes from host to GPU \n", tensor->size_in_bytes); + tensor->data_placement = DEVICE; + } + else{ + DEBUG("No data movement required - Data on Device \n"); + } + } + + } + +} + +// Called from within the runtime to change the data placement +// This routine is required to change the output data placements from host to device +void changeTensorPlacement(struct Tensor* tensor, data_location_t data_placement){ + tensor->data_placement = data_placement; +} + + +#endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu new file mode 100644 index 0000000000000000000000000000000000000000..c39f844c4e87662258f8a564cadbb56962cd14b3 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu @@ -0,0 +1,999 @@ +/* This file includes the API implementation of the HPVM tensor runtime built on cublas, cudnn +** +** Author: Hashim Sharif +** Email: hsharif3@illinois.edu +*/ + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <ctime> +#include <cfloat> +#include <algorithm> +#include <chrono> +#include <iomanip> +#include <iostream> +#include <map> +#include <memory> +#include <random> +#include <sstream> +#include <string> +#include <vector> + +#include <cuda_runtime.h> +#include <device_launch_parameters.h> + +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> +#include <cuda_fp16.h> +#include <driver_types.h> + + +// Tensor runtime header files +#include "../include/tensor_runtime.h" +#include "../include/tensor_utils.cu" +#include "../include/debug.h" +#include "../include/profiling.h" +#include "../include/fp16_conversion.h" +#include "../include/global_data.h" +#include "../include/error.h" +#include "../include/tensor.h" +#include "../include/op_overheads.h" +#include "../include/half_precision_api.h" + + + +//** TODOs: +// 1) Add support for dataypes beyond floats - need to fix hardcoded CUDNN_DATA_FLOAT inputs +// 2) Add a larger set of operations + + + +void llvm_hpvm_initTensorRt(int gpuid){ + // NOTE: Setting the target GPU. Can we use multiple GPUs? + checkCudaErrors(cudaSetDevice(gpuid)); + // Initializing cuDNN and cuBlas handles + checkCudaErrors(cublasCreate(&cublasHandle)); + checkCUDNN(cudnnCreate(&cudnnHandle)); + + #ifdef ERROR_INJECTION_ENABLED + + readOpenTunerFlags("opentuner_flags"); + + #endif +} + + +void llvm_hpvm_cleanupTensorRt(){ + +} + +void dumpAccuracyNorms(){ + + #ifdef ERROR_INJECTION_ENABLED + + dump_result("accuracy_summary"); + + #endif +} + + +// Returns the number of GPUs active on the platform +int getGPUCount(){ + int num_gpus; + checkCudaErrors(cudaGetDeviceCount(&num_gpus)); + return num_gpus; +} + + + +void clearTensorMap(){ + + tensors_ptr.clear(); + host_ptr.clear(); + obj_ptr.clear(); +} + + +void freeOutputTensors(){ + + for(int i = 0; i < tensors_ptr.size(); i++){ + //printf("freeing tensor \n"); + cudaFree(tensors_ptr[i]); + } + + for(int i = 0; i < host_ptr.size(); i++){ + //printf("freeing tensor \n"); + free(host_ptr[i]); + host_ptr[i] = NULL; + } + + for(int i = 0; i < obj_ptr.size(); i++){ + //printf("freeing tensor \n"); + free(obj_ptr[i]); + obj_ptr[i] = NULL; + } +} + + + +void clearOpCounter(){ + total_ops = 0; + op_counter = 0; + op_accuracies.clear(); +} + + + + + + +// FIXIT: Fix any assumptions on the NCHW format +// TODO: benchmark split performance and check if it is prohibitively high? +void** tensorSplit(void* tensor_ptr, int num_splits, int split_dim){ + + INFO("*** TensorSplit \n"); + profileEvent("tensorSplit"); + + Tensor* tensor = (Tensor*) tensor_ptr; + + deviceToHostCopy(tensor); // Splitting done on the host + + Tensor** splits = (Tensor**) malloc(sizeof(Tensor*) * num_splits); + size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensor->dims.num_dims); + for(unsigned int i = 0; i < tensor->dims.num_dims; i++){ + dim_sizes[i] = tensor->dims.dim_sizes[i]; + } + + + dim_sizes[split_dim] = tensor->dims.dim_sizes[split_dim] / num_splits; + if(dim_sizes[split_dim] < 1) + ERROR("Split Dimension < 1 after splitting"); + + size_t copy_size = getTypeSize(tensor->data_type); + for(unsigned int i = split_dim; i < tensor->dims.num_dims; i++){ + copy_size = copy_size * dim_sizes[i]; + } + + for(unsigned int i = 0; i < num_splits; i++){ + // FIXIT: Don't be specific to 4D tensors + // NOTE: Using same data format (NHWC/NCHW) for the split tensors + INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", + dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); + + Tensor* split = (Tensor*) create4DTensor(tensor->data_type, tensor->data_format, + dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); + + size_t copy_start = i * copy_size; + size_t copy_stride = num_splits * copy_size; + INFO("copy_size = %d, copy_start = %d, copy_stride = %d, tensor->size_in_bytes = %d \n", + copy_size, copy_start, copy_stride, tensor->size_in_bytes); + + int index = 0; + while(copy_start + copy_size <= tensor->size_in_bytes){ + memcpy(((char*) split->host_data + (index * copy_size)), + ((char*)tensor->host_data + copy_start), + copy_size); + copy_start += copy_stride; + index++; + } + + splits[i] = split; + } + + profileEvent("tensorSplit_end", true); + + return (void**) splits; +} + + +void* tensorConcat(void** tensors_ptr, int num_splits, int split_dim){ + + INFO("*** TensorConcat \n"); + profileEvent("tensorConcat"); + + Tensor** tensors = (Tensor**) tensors_ptr; + + for(int i = 0; i < num_splits; i++){ + deviceToHostCopy(tensors[i]); // Concatenation done on the host + } + + // The no of dimensions of concatenated tensor are the same + size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensors[0]->dims.num_dims); + for(unsigned int i = 0; i < tensors[0]->dims.num_dims; i++){ + dim_sizes[i] = tensors[0]->dims.dim_sizes[i]; + } + + size_t copy_size = getTypeSize(tensors[0]->data_type); + for(unsigned int i = split_dim; i < tensors[0]->dims.num_dims; i++){ + copy_size = copy_size * dim_sizes[i]; + } + + dim_sizes[split_dim] = dim_sizes[split_dim] * num_splits; + if(dim_sizes[split_dim] < 1) + ERROR("Split Dimension < 1 after concat"); + + Tensor* output = (Tensor*) create4DTensor(tensors[0]->data_type, tensors[0]->data_format, + dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); + + INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", + dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); + + + int num_copies = 1; + for(unsigned int i = 0; i < split_dim; i++){ + num_copies = num_copies * dim_sizes[i]; + } + + size_t copy_stride = num_splits * copy_size; + INFO("copy_size = %d, num_copies = %d, copy_stride = %d, output->size_in_bytes = %d \n", + copy_size, num_copies, copy_stride, output->size_in_bytes); + + for(unsigned int i = 0; i < num_copies; i++){ + // FIXIT: Don't be specific to 4D tensors + size_t copy_start = i * copy_stride; + + for(int j = 0; j < num_splits; j++){ + struct Tensor* split = tensors[j]; + memcpy(((char*) output->host_data + copy_start + (j * copy_size)), + ((char*) split->host_data + (i * copy_size)), + copy_size); + } + } + + profileEvent("tensorConcat_end", true); + + return output; +} + + + +void* tensorLRN(void* input_ptr, unsigned int LRN_window, + double LRN_alpha, double LRN_beta, double LRN_k){ + + INFO("*** TensorLRN \n"); + profileEvent("tensorLRN"); + + Tensor* input = (Tensor*) input_ptr; + + hostToDeviceCopy(input); + + float alpha = 1.0f, beta = 0.0f; + cudnnLRNDescriptor_t LRNDesc; + checkCUDNN(cudnnCreateLRNDescriptor(&LRNDesc)); + + INFO("window = %d, LRN_alpha = %f, LRN_beta = %f, LRN_k = %f \n", + LRN_window, LRN_alpha, LRN_beta, LRN_k); + + + checkCUDNN(cudnnSetLRNDescriptor(LRNDesc, LRN_window, LRN_alpha, LRN_beta, LRN_k)); + + size_t* dim_sizes = input->dims.dim_sizes; + Tensor* output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, + CUDNN_TENSOR_NCHW, dim_sizes[0], dim_sizes[1], + dim_sizes[2], dim_sizes[3]); + // NOTE: Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + // NOTE: Necessary to insert the above call for every output tensor + + printTensorDescInfo(input); + printTensorDescInfo(output); + + checkCUDNN(cudnnLRNCrossChannelForward(cudnnHandle, LRNDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, + &alpha, input->tensor_desc, input->gpu_data, + &beta, output->tensor_desc, output->gpu_data)); + + profileEvent("tensorLRN_end", true); + + return output; +} + + + + +// FIXIT: Apparently this is not working for 3D tensors or dimensions other than 4D +// Perhaps 3D, 2D tensors can be remapped to 4D tensors to make this work? +void* tensorAdd(void* x_ptr, void* bias_ptr){ + + Tensor* x = (Tensor*) x_ptr; + Tensor* bias = (Tensor*) bias_ptr; + + INFO("*** TensorAdd \n"); + profileEvent("tensorAdd"); + + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(x); + hostToDeviceCopy(bias); + + // FIXIT: routine fails for 3D tensors + checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc, + bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data)); + + profileEvent("tensorAdd_end", true); + + + #ifdef ERROR_INJECTION_ENABLED + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(x, op_acc); + add_norms(error_norms, "tensorAdd", op_acc); + add_bias_overheads(x, op_acc); + op_counter++; + + #endif + + + return x; +} + + +// FIXIT: Generalize all of the routines for types {half, float, double} +void* tensorConvolution(void* input_ptr, void* filter_ptr, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision){ + + INFO("*** TensorConvolution \n"); + profileEvent("tensorConv"); + + Tensor* input = (Tensor*) input_ptr; + Tensor* filter = (Tensor*) filter_ptr; + + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t convAlgo; + cudnnConvolutionMode_t mode; + if(conv_mode == 0) + mode = CUDNN_CONVOLUTION; + else if(conv_mode == 1) + mode = CUDNN_CROSS_CORRELATION; + + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + + cudnnDataType_t computeType = CUDNN_DATA_FLOAT; + if(compute_precision == 0){ + computeType = CUDNN_DATA_FLOAT; + } + + if(compute_precision == 1){ + computeType = CUDNN_DATA_HALF; + } + + // TODO: Support other cases; + hostToDeviceCopy(input); + hostToDeviceCopy(filter); + + checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); + // FIXIT: Think if upscaling values need to be configurable? + // IMP-FIXIT: CUDNN Cross correlation is only used in the Lenet context + // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used? + checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc, + vertical_pad, horizontal_pad, // conv padding + vertical_stride, horizontal_stride, // conv strides + 1, 1, // upscaling values + mode , // mode is configurable + computeType)); // defines compute precision + + int n, c, h, w; // output dimensions + // Find dimension of convolution output + checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc, + input->tensor_desc, + filter->filter_desc, + &n, &c, &h, &w)); + + + DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); + + Tensor* output; + if(input->data_format == CUDNN_TENSOR_NCHW) + output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, + CUDNN_TENSOR_NCHW, n, c, h, w); + else if(input->data_format == CUDNN_TENSOR_NHWC){ + DEBUG("* NHWC Format \n"); + output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, + CUDNN_TENSOR_NHWC, n, h, w, c); + } + else + ERROR("Unsupported Tensor Type"); + + // NOTE: Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + // NOTE: Necessary to insert the above call for every output tensor + + DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, H = %d, W = %d, C = %d \n", + output->data_type, output->data_format, output->dims.dim_sizes[0], output->dims.dim_sizes[1], + output->dims.dim_sizes[2], output->dims.dim_sizes[3]); + + if(convDesc == NULL || input->tensor_desc == NULL || + filter->filter_desc == NULL || output->tensor_desc == NULL) + ERROR("NULL descriptor! \n"); + + + // Debugging info prints + printTensorDescInfo(input); + printTensorDescInfo(filter); + printTensorDescInfo(output); + + // NOTE-FIXIT: function failing for NHWC formats - perhaps some CUDNN support is lacking + checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnnHandle, + input->tensor_desc, + filter->filter_desc, + convDesc, + output->tensor_desc, + CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, + //CUDNN_CONVOLUTION_FWD_NO_WORKSPACE, + 0, + &convAlgo)); + + + DEBUG("ConvAlgo = %d, FFT = %d, GEMM = %d, WINOGRAD = %d \n", convAlgo, + CUDNN_CONVOLUTION_FWD_ALGO_FFT, CUDNN_CONVOLUTION_FWD_ALGO_GEMM, + CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD); + + + // FIXIT: Algo shouldn't be hardcoded + convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; + + size_t workspace_size; + checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle, + input->tensor_desc, + filter->filter_desc, + convDesc, + output->tensor_desc, + convAlgo, + &workspace_size)); + + // Allocating memory for the convolution workspace + void* workspace; + checkCudaErrors(cudaMalloc(&workspace, workspace_size)); + DEBUG("workspace size = %d \n", workspace_size); + + + checkCUDNN(cudnnConvolutionForward(cudnnHandle, &alpha, input->tensor_desc, + input->gpu_data, filter->filter_desc, filter->gpu_data, + convDesc, convAlgo, workspace, workspace_size, + &beta, output->tensor_desc, output->gpu_data)); + + profileEvent("tensorConv_end", true); + + + #ifdef ERROR_INJECTION_ENABLED + + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(output, op_acc); + add_norms(error_norms, "tensorConv", op_acc); + add_conv_overheads(input, filter, vertical_stride, horizontal_stride, op_acc); + + op_counter++; + + #endif + + + return output; +} + + + + + + +// FIXIT: Currently this only computes MAX pooling +// FIXIT: Add support for Average Pooling +void* tensorPooling(void* input_ptr, + int poolFunction, + int window_height, int window_width, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride){ + + INFO("*** TensorPooling \n"); + profileEvent("tensorPooling"); + + Tensor* input = (Tensor*) input_ptr; + + cudnnPoolingDescriptor_t poolDesc; + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + + hostToDeviceCopy(input); + + checkCUDNN(cudnnCreatePoolingDescriptor(&poolDesc)); + + int n = input->dims.dim_sizes[0]; + int c = input->dims.dim_sizes[1]; + int h = (input->dims.dim_sizes[2] + (2 * vertical_pad) - window_height) / vertical_stride; + h = h + 1; + int w = (input->dims.dim_sizes[3] + (2 * horizontal_pad) - window_width) / horizontal_stride; + w = w + 1; + + DEBUG("n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); + + // FIXIT: Don't be specific to floats + Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, n, c, h, w); + // Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + + // FIXIT: Fix being specific to CUDNN_DATA_FLOAT and NCHW format + // FIXIT: Is this setTensor even needed? + checkCUDNN(cudnnSetTensor4dDescriptor(output->tensor_desc, + CUDNN_TENSOR_NCHW, + CUDNN_DATA_FLOAT, + n, c, + h, w)); + + // FIXIT: Make the pool function (max, min, avg) configurable + checkCUDNN(cudnnSetPooling2dDescriptor(poolDesc, + CUDNN_POOLING_MAX, + CUDNN_PROPAGATE_NAN, + window_height, window_width, + vertical_pad, horizontal_pad, + vertical_stride, horizontal_stride)); + + checkCUDNN(cudnnPoolingForward(cudnnHandle, poolDesc, &alpha, input->tensor_desc, + input->gpu_data, &beta, output->tensor_desc, output->gpu_data)); + + profileEvent("tensorPooling_end", true); + + + #ifdef ERROR_INJECTION_ENABLED + + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(output, op_acc); + add_norms(error_norms, "tensorPooling", op_acc); + add_pool_overheads(input, window_height, vertical_stride, op_acc); + + op_counter++; + + #endif + + + return output; +} + + + + +void* tensorGemmCPU(void* lhs_ptr, void* rhs_ptr){ + + INFO("*** TensorGemmCPU \n"); + + Tensor* lhs = (Tensor*) lhs_ptr; + Tensor* rhs = (Tensor*) rhs_ptr; + + // The operation is done on the CPU + deviceToHostCopy(lhs); + deviceToHostCopy(rhs); + + if(lhs->data_type != CUDNN_DATA_FLOAT){ + ERROR("Currently only Floating point is supported "); + } + + profileEvent("tensorGemmCPU"); + + INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); + INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); + + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + // 'm' holds the batch dimension - assuming NCHW format Tensors + int m = lhs->dims.dim_sizes[0]; + // The rhs must be a 2D tensor + int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons + int k = 1; + // Flattening the dimensions after the batch dimension + // NOTE: Allowing any number of dimensions > 2 for lhs + for (int j = 1 ; j < lhs->dims.num_dims; j++){ + k = k * lhs->dims.dim_sizes[j]; // input neurons + } + + int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; + // Dimension-note: Check if k is same across the two tensors + INFO("m = %d, n = %d, k = %d \n", m, n, k); + if(rhs_k != k){ + ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); + } + + // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines + Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); + // Changing output tensor placement from host to device + changeTensorPlacement(output, HOST); + + float* lhs_arr = (float*) lhs->host_data; + float* rhs_arr = (float*) rhs->host_data; + float* output_arr = (float*) output->host_data; + + for(int i = 0; i < m; i++){ + for(int j = 0; j < n; j++){ + float sum = 0.0; + for(int l = 0; l < k; l++){ + float mul = lhs_arr[i*k+l] * rhs_arr[l*n+j]; + sum = sum + mul; + } + output_arr[i*n+j] = sum; + } + } + + + profileEvent("tensorGemmCPU_end", true); + + return output; +} + + + +// Reference: https://gist.github.com/peterwittek/6303527 +void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr){ + + INFO("*** TensorGemmGPU \n"); + profileEvent("tensorGemmGPU"); + + Tensor* lhs = (Tensor*) lhs_ptr; + Tensor* rhs = (Tensor*) rhs_ptr; + + INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); + INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); + + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + // 'm' holds the batch dimension - assuming NCHW format Tensors + int m = lhs->dims.dim_sizes[0]; + // The rhs last dimension must contain the neurons + int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons + int k = 1; + + // Flattening the dimensions after the batch dimension + // NOTE: Allowing any number of dimensions > 2 for lhs + for (int j = 1 ; j < lhs->dims.num_dims; j++){ + k = k * lhs->dims.dim_sizes[j]; // input neurons + } + + int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; + // Dimension-note: Check if k is same across the two tensors + INFO("m = %d, n = %d, k = %d \n", m, n, k); + if(rhs_k != k){ + ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); + } + + // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines + Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); + //struct Tensor* output = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); + + //tensors_ptr.push_back(output->gpu_data); + + // Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + + hostToDeviceCopy(lhs); + hostToDeviceCopy(rhs); + + //profileEvent("cublasSgemm"); + + // INFO: cuBlas uses column-major format + // INFO: The leading dimension is just the FIRST Dimension + // IMP: The output is N * M in column-major format, which is M*N in row-major - what cuDNN expects + checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, + n, m, k, + &alpha, + (float*) rhs->gpu_data, n, + (float*) lhs->gpu_data, k, + &beta, + (float*) output->gpu_data, n)); + + // profileEvent("cublasSgemm_end", true); + + + profileEvent("tensorGemmGPU_end", true); + + + + #ifdef ERROR_INJECTION_ENABLED + + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(output, op_acc); + add_norms(error_norms, "tensorGemm", op_acc); + add_gemm_overheads(lhs_ptr, rhs_ptr, op_acc); + + op_counter++; + + #endif + + + return output; +} + + + + + + + +void* tensorGemm(void* lhs_ptr, void* rhs_ptr){ + + INFO("*** TensorGemm \n"); + profileEvent("tensorGemm"); + + Tensor* lhs = (Tensor*) lhs_ptr; + Tensor* rhs = (Tensor*) rhs_ptr; + + INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); + INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); + + // FIXIT: Need to be more aware of the implications of alpha and beta + float alpha = 1.0f, beta = 0.0f; + // 'm' holds the batch dimension - assuming NCHW format Tensors + int m = lhs->dims.dim_sizes[0]; + // The rhs last dimension must contain the neurons + int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons + int k = 1; + // Flattening the dimensions after the batch dimension + // NOTE: Allowing any number of dimensions > 2 for lhs + for (int j = 1 ; j < lhs->dims.num_dims; j++){ + k = k * lhs->dims.dim_sizes[j]; // input neurons + } + + int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; + // Dimension-note: Check if k is same across the two tensors + INFO("m = %d, n = %d, k = %d \n", m, n, k); + if(rhs_k != k){ + ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); + } + + // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines + Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); + // Changing output tensor placement from host to device + changeTensorPlacement(output, DEVICE); + + hostToDeviceCopy(lhs); + hostToDeviceCopy(rhs); + + // INFO: cuBlas uses column-major format + // INFO: The leading dimension is just the FIRST Dimension + // IMP: The output is N * M in column-major format, which is M*N in row-major - what cuDNN expects + checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, + n, m, k, + &alpha, + (float*) rhs->gpu_data, k, + (float*) lhs->gpu_data, k, + &beta, + (float*) output->gpu_data, n)); + + profileEvent("tensorGemm_end", true); + + return output; +} + + + + +// FIXIT: Add dimension check assertions throughout the code +void* tensorGemmBias(void* input_ptr, void* bias_ptr){ + + INFO("*** TensorGemmBias \n"); + profileEvent("tensorGemmBias"); + + Tensor* input = (Tensor*) input_ptr; + Tensor* bias = (Tensor*) bias_ptr; + + // IMP: beta is set to 1 to append to input + // C = A * B + Beta * C + float alpha = 1.0f, beta = 1.0f; + // 'm' holds the batch dimension - assuming NCHW format Tensors + int m = input->dims.dim_sizes[0]; + // The bias must be a 2D tensor + int n = bias->dims.dim_sizes[bias->dims.num_dims - 1]; // output neurons + + INFO("m = %d, n = %d \n", m, n); + + hostToDeviceCopy(input); + hostToDeviceCopy(bias); + + struct Tensor* onevec = (Tensor*) create2DTensor(CUDNN_DATA_FLOAT, m, 1); + fillOnes(onevec); + hostToDeviceCopy(onevec); + + // INFO: cuBlas uses column-major format + // INFO: The leading dimension is just the FIRST Dimension + // CONFUSION: Not sure when to transpose and when not to + checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, + n, m, 1, + &alpha, + (float*) bias->gpu_data, n, + (float*) onevec->gpu_data, 1, + &beta, + (float*) input->gpu_data, n)); + + profileEvent("tensorGemmBias_end", true); + + return input; +} + + +void* tensorRelu(void* input_ptr){ + + INFO("*** TensorRelu \n"); + profileEvent("tensorRelu"); + + Tensor* input = (Tensor*) input_ptr; + + cudnnActivationDescriptor_t reluDesc; + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(input); + + checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc)); + + checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_RELU, + CUDNN_PROPAGATE_NAN, 0.0)); + + checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha, + input->tensor_desc, input->gpu_data, &beta, + input->tensor_desc, input->gpu_data)); + + profileEvent("tensorRelu_end", true); + + + #ifdef ERROR_INJECTION_ENABLED + + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(input, op_acc); + add_norms(error_norms, "tensorRelu", op_acc); + add_relu_overheads(input, op_acc); + op_counter++; + #endif + + + return input; +} + + +// Think: Should Softmax be broken into individual IR operations? +void* tensorSoftmax(void* input_ptr){ + + INFO("*** TensorSoftmax \n"); + profileEvent("tensorSoftmax"); + + Tensor* input = (Tensor*) input_ptr; + + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(input); + + // IMP: CUDNN_SOFTMAX_ACCURATE can be replaced with a less acurate but faster version - CUDNN_SOFTMAX_FAST + // However, not sure if the Softmax overhead is even a concern + // TODO: Figure out if mode should be MODE_CHANNEL or MODE_INSTANCE? + checkCUDNN(cudnnSoftmaxForward(cudnnHandle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, + &alpha, input->tensor_desc, input->gpu_data, &beta, + input->tensor_desc, input->gpu_data)); + + deviceToHostCopy(input); + profileEvent("tensorSoftmax_end", true); + + return input; +} + + + +__global__ void clipValues(float* A, float min, float max, int n){ + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id < n){ + A[id] = fmaxf(min, A[id]); + A[id] = fminf(max, A[id]); + } +} + + + +void* tensorRelu2(void* input_ptr, float min, float max){ + + INFO("*** TensorClippedRelu *** \n"); + profileEvent("tensorClippedRelu"); + + cudnnActivationDescriptor_t reluDesc; + float alpha = 1.0f, beta = 0.0f; + + Tensor* input = (Tensor*) input_ptr; + hostToDeviceCopy(input); + + + checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc)); + + checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_CLIPPED_RELU, + CUDNN_PROPAGATE_NAN, 2.0)); + + checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha, + input->tensor_desc, input->gpu_data, &beta, + input->tensor_desc, input->gpu_data)); + + + /*int blockSize = 128; + int gridSize = (int) ceil ((float) input->num_elems / blockSize); + INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize); + + // NOTE: Check if a large gridSize will work with really large tensors + //clipValues<<<gridSize, blockSize>>>((float*) input->gpu_data, min, max, input->num_elems); + */ + + profileEvent("tensorClippedRelu_end", true); + + + #ifdef ERROR_INJECTION_ENABLED + + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(input, op_acc); + add_norms(error_norms, "tensorClippedRelu", op_acc); + add_relu_overheads(input, op_acc); + op_counter++; + #endif + + + return input; +} + + +void* tensorTanh(void* input_ptr){ + + INFO("*** TensorTanh \n"); + profileEvent("tensorTanh"); + + Tensor* input = (Tensor*) input_ptr; + + cudnnActivationDescriptor_t tanhDesc; + float alpha = 1.0f, beta = 0.0f; + hostToDeviceCopy(input); + + checkCUDNN(cudnnCreateActivationDescriptor(&tanhDesc)); + + checkCUDNN(cudnnSetActivationDescriptor(tanhDesc, CUDNN_ACTIVATION_TANH, + CUDNN_PROPAGATE_NAN, 0.0)); + + checkCUDNN(cudnnActivationForward(cudnnHandle, tanhDesc, &alpha, + input->tensor_desc, input->gpu_data, &beta, + input->tensor_desc, input->gpu_data)); + + profileEvent("tensorTanh_end", true); + + + #ifdef ERROR_INJECTION_ENABLED + + if(op_counter >= total_ops){ + ERROR("No accuracy flag found \n"); + } + + int op_acc = op_accuracies[op_counter]; + void* error_norms = tensorAddError(input, op_acc); + add_norms(error_norms, "tensorTanh", op_acc); + add_relu_overheads(input, op_acc); + op_counter++; + #endif + + + return input; +} + + +