diff --git a/llvm/projects/hpvm-tensor-rt/.gitignore b/llvm/projects/hpvm-tensor-rt/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..9581f1d5b06aaafaaae77ef6175bc243707e4685
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/.gitignore
@@ -0,0 +1 @@
+./build
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28ca6bb639731444c030018c4abc726e8d3b597f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt
@@ -0,0 +1,80 @@
+cmake_minimum_required (VERSION 2.6)
+project (cudnn-training)
+
+find_package(CUDA 6.5 REQUIRED)
+
+
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+  message("Debug mode")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-g;-lineinfo;-Xcompiler;-ggdb;-lcurand)
+else()
+   set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-O3;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand)
+endif()
+
+set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+
+# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 " )
+
+add_definitions(-DNO_INJECTION)
+if(USE_GFLAGS)
+  add_definitions(-DUSE_GFLAGS)
+endif()
+
+include_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include)
+link_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64)
+
+
+# Adding new rule for building a cuDNN runtime library
+cuda_add_library(tensor_runtime tensor_runtime/src/tensor_runtime.cu)
+cuda_add_cublas_to_target(tensor_runtime)
+
+
+if(USE_GFLAGS)
+  target_link_libraries(tensor_runtime gflags cudnn -lcurand)
+else()
+  target_link_libraries(tensor_runtime cudnn -lcurand)
+endif()
+
+
+
+# Adding rule for the debugging source
+add_executable(test_ops  dnn_sources/src/test_ops.cc)
+target_link_libraries(test_ops  tensor_runtime)
+
+
+# Full-Precision versions
+add_executable(fc2_clipped  dnn_sources/src/fc2_clipped.cc)
+target_link_libraries(fc2_clipped  tensor_runtime)
+
+add_executable(fc3_clipped  dnn_sources/src/fc3_clipped.cc)
+target_link_libraries(fc3_clipped  tensor_runtime)
+
+add_executable(fc4_clipped  dnn_sources/src/fc4_clipped.cc)
+target_link_libraries(fc4_clipped  tensor_runtime)
+
+add_executable(lenet_tanh  dnn_sources/src/lenet2_tanh.cc)
+target_link_libraries(lenet_tanh  tensor_runtime)
+
+add_executable(lenet_keras  dnn_sources/src/lenet_keras.cc)
+target_link_libraries(lenet_keras  tensor_runtime)
+
+add_executable(cifar_keras  dnn_sources/src/cifar_keras.cc)
+target_link_libraries(cifar_keras  tensor_runtime)
+
+
+# Half precision networks
+add_executable(fc2_half  dnn_sources/src/half/fc2_half.cc)
+target_link_libraries(fc2_half  tensor_runtime)
+
+add_executable(fc3_half  dnn_sources/src/half/fc3_half.cc)
+target_link_libraries(fc3_half  tensor_runtime)
+
+add_executable(fc4_half  dnn_sources/src/half/fc4_half.cc)
+target_link_libraries(fc4_half  tensor_runtime)
+
+add_executable(lenet_tanh_half  dnn_sources/src/half/lenet_tanh_half.cc)
+target_link_libraries(lenet_tanh_half  tensor_runtime)
+
+add_executable(lenet_keras_half  dnn_sources/src/half/lenet_keras_half.cc)
+target_link_libraries(lenet_keras_half  tensor_runtime)
diff --git a/llvm/projects/hpvm-tensor-rt/bin/measure_confidence.py b/llvm/projects/hpvm-tensor-rt/bin/measure_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..74aa23c71aa3e81fc9422a3cc73ba3b69ed98c8a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/bin/measure_confidence.py
@@ -0,0 +1,125 @@
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+def getAccuracy(file_name):
+
+  if not os.path.exists(file_name):
+    print("final_accuracy file not found ")
+    sys.exit(0)
+    
+  file = open(file_name, "r")
+  acc_str = file.read()
+  accuracy = float(acc_str)
+  print accuracy
+  return accuracy  
+
+
+total_runs = 12.0
+skip_lines = 0
+
+
+def test_func():
+  print "test_func"
+  sys.exit(0)
+
+
+def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold):
+
+  #total_runs = 100.0
+  successful_runs = 0.0
+  total_acc = 0
+
+  for i in range(int(total_runs)):
+    subprocess.call(binary_name)
+    accuracy = getAccuracy("final_accuracy")
+    total_acc += accuracy
+
+    if accuracy > accuracy_threshold:
+      successful_runs += 1
+
+  confidence = (successful_runs / total_runs) * 100.0    
+  print("confidence = ", confidence)    
+  avg_acc = total_acc / total_runs
+  print("average accuracy = ", avg_acc)
+
+  return confidence, avg_acc
+  
+
+def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir):
+
+  confidence_list = []
+  
+  if not os.path.exists(result_dir):
+    print("Path does not exist")
+    sys.exit(0)
+
+  file_names = os.listdir(result_dir)
+  print file_names
+
+  for file_name in file_names:
+    # Skip sub-directories
+    if os.path.isdir(result_dir + "/" + file_name):
+      continue
+    
+    f = open(result_dir + "/" + file_name)
+    tuner_file = open("opentuner_flags", "w+")
+
+    index = 0
+    results_str = ""
+    for x in f:
+      if index >= skip_lines:
+        error_knob = int(float(x.split()[1]))
+        print error_knob
+        tuner_file.write(str(error_knob) + "\n")
+
+      results_str += x
+      index += 1
+      
+    tuner_file.close()
+    
+    run_confidence, avg_accuracy = do_multiple_runs(binary, accuracy, confidence)
+
+    if run_confidence > 90:
+      f2 = open(output_dir + "/" + file_name, "w+")
+      f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n")
+      f2.write(results_str)
+      f2.close()
+
+    conf_result = (run_confidence, avg_accuracy, file_name)
+    confidence_list.append(conf_result) 
+
+  return confidence_list
+    
+
+if __name__ == "__main__":
+
+  argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy')
+  argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations')
+  argparser.add_argument('--output-dir', help='Directory for storing output directory')
+  argparser.add_argument('--binary', help='Binary name to run')
+  argparser.add_argument('--accuracy', type=float,  help='Accuracy constraint')
+  argparser.add_argument('--confidence', type=float, help='Confidence threshold')
+  
+
+  args = argparser.parse_args()
+  result_dir = args.result_dir
+  output_dir = args.output_dir
+  binary = args.binary
+  accuracy = args.accuracy
+  confidence = args.confidence
+
+  confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir)
+  #print confidence_list
+
+  sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True)
+   
+  output_file = open(output_dir + "/confidence_summary.txt", "w+")
+  for x in sorted_list:
+    output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n")    
+
+  output_file.close()
+  
diff --git a/llvm/projects/hpvm-tensor-rt/bin/select_top_results.py b/llvm/projects/hpvm-tensor-rt/bin/select_top_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..898b4c4f42211e010b1544039cbd4b4125c03b92
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/bin/select_top_results.py
@@ -0,0 +1,89 @@
+
+
+import argparse
+import sys
+import os
+
+
+log_index = 7
+linear_index = 8
+quad_index = 9
+
+top_k = 10
+
+def dump_results(sorted_list, k, result_dir, sub_dir):
+
+  ref_dir = result_dir + "/" + sub_dir
+  if not os.path.exists(ref_dir):
+    os.mkdir(ref_dir)
+  
+  for i in range(k):
+    file_name = sorted_list[i][1]
+    file_name = ref_dir + "/" + file_name + "_rank_" + str(i)
+    f = open(file_name, "w+")
+    f.write(str(sorted_list[i][2]) + "\t")
+    f.write(str(sorted_list[i][3]) + "\t")
+    f.write(str(sorted_list[i][4]) + "\n")
+    f.write(sorted_list[i][0])
+    f.close()
+
+    
+    
+
+def select_top_results(result_dir):
+
+  if not os.path.exists(result_dir):
+    print("Path does not exist")
+    sys.exit(0)
+
+  file_names = os.listdir(result_dir)
+  print file_names
+
+  results_arr = []
+  
+  for file_name in file_names:
+    # Skip sub-directories
+    if os.path.isdir(result_dir + "/" + file_name):
+      continue
+
+    log_result = 0.0
+    linear_result = 0.0
+    quad_result = 0.0
+    file_str = ""
+    
+    f = open(result_dir + "/" + file_name)
+    for x in f:
+      words = x.split()
+      log_result += float(words[log_index])
+      linear_result += float(words[linear_index])
+      quad_result += float(words[quad_index])
+      file_str += x 
+      
+
+    file_result = (file_str, file_name, log_result, linear_result, quad_result)          
+    results_arr.append(file_result)    
+
+    
+  sorted_list = sorted(results_arr, key = lambda tup: tup[2])
+  dump_results(sorted_list, top_k, result_dir, "log")
+
+  sorted_list = sorted(results_arr, key = lambda tup: tup[3])
+  dump_results(sorted_list, top_k, result_dir, "linear")
+
+  sorted_list = sorted(results_arr, key = lambda tup: tup[4])
+  dump_results(sorted_list, top_k, result_dir, "quad")
+
+
+
+if __name__ == "__main__":
+
+  argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy')
+  argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations')
+
+  args = argparser.parse_args()
+  result_dir = args.result_dir
+
+  select_top_results(result_dir)
+  
+
+    
diff --git a/llvm/projects/hpvm-tensor-rt/bin/setupEnv.sh b/llvm/projects/hpvm-tensor-rt/bin/setupEnv.sh
new file mode 100644
index 0000000000000000000000000000000000000000..58f16f20d0af12f041840b8037ae13e49c214ed4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/bin/setupEnv.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+module load cuda-toolkit/8.0
+export CUDNN_PATH=/software/cuda-toolkit-8.0/lib64/
+export LIBRARY_PATH=$LIBRARY_PATH:/software/cuda-toolkit-8.0/lib64/
+
diff --git a/llvm/projects/hpvm-tensor-rt/bin/setup_paths.sh b/llvm/projects/hpvm-tensor-rt/bin/setup_paths.sh
new file mode 100644
index 0000000000000000000000000000000000000000..446481b79a47827bf47341ce9d14f15f57d26866
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/bin/setup_paths.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# Setting include path for Anaconda include files
+export CPATH=$CPATH:/home/hsharif3/anaconda2/include/
+# Setting path for llvm/clang-4.0 build
+export PATH=/home/hsharif3/Gitlab/llvm/llvm/build/bin/:$PATH
+
+export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/
+
+export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/
diff --git a/llvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh b/llvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh
new file mode 100644
index 0000000000000000000000000000000000000000..47b446456e70d1cdcfc6f799ee51caca67fe857c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/bin/setup_tyler_paths.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# CUDNN Path setup
+module load cuda-toolkit/9.1
+export CUDNN_PATH=/software/cuda-9.1/lib64/
+export LIBRARY_PATH=$LIBRARY_PATH:/software/cuda-toolkit-9.1/lib64/
+
+# HPVM Path setup
+export CPATH=$CPATH:/home/hsharif3/anaconda2/include/
+export PATH=/home/hsharif3/Gitlab/llvm/llvm/build/bin/:$PATH
+export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/
+export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeCache.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeCache.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5132c46fbb4539935b1c4c3bccff1d80679482a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeCache.txt
@@ -0,0 +1,514 @@
+# This is the CMakeCache file.
+# For build in directory: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+# It was generated by CMake: /usr/bin/cmake
+# You can edit this file to change values found and used by cmake.
+# If you do not want to change any of the values, simply exit the editor.
+# If you do want to change a value, simply edit, save, and exit the editor.
+# The syntax for the file is as follows:
+# KEY:TYPE=VALUE
+# KEY is the name of a variable in the cache.
+# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
+# VALUE is the current value for the KEY.
+
+########################
+# EXTERNAL cache entries
+########################
+
+//Path to a program.
+CMAKE_AR:FILEPATH=/usr/bin/ar
+
+//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or
+// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.
+CMAKE_BUILD_TYPE:STRING=
+
+//Enable/Disable color output during build.
+CMAKE_COLOR_MAKEFILE:BOOL=ON
+
+//CXX compiler
+CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++
+
+//Flags used by the compiler during all build types.
+CMAKE_CXX_FLAGS:STRING=
+
+//Flags used by the compiler during debug builds.
+CMAKE_CXX_FLAGS_DEBUG:STRING=-g
+
+//Flags used by the compiler during release builds for minimum
+// size.
+CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+
+//Flags used by the compiler during release builds.
+CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+
+//Flags used by the compiler during release builds with debug info.
+CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
+
+//C compiler
+CMAKE_C_COMPILER:FILEPATH=/usr/bin/cc
+
+//Flags used by the compiler during all build types.
+CMAKE_C_FLAGS:STRING=
+
+//Flags used by the compiler during debug builds.
+CMAKE_C_FLAGS_DEBUG:STRING=-g
+
+//Flags used by the compiler during release builds for minimum
+// size.
+CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+
+//Flags used by the compiler during release builds.
+CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+
+//Flags used by the compiler during release builds with debug info.
+CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
+
+//Flags used by the linker.
+CMAKE_EXE_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Enable/Disable output of compile commands during generation.
+CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=OFF
+
+//Install path prefix, prepended onto install directories.
+CMAKE_INSTALL_PREFIX:PATH=/usr/local
+
+//Path to a program.
+CMAKE_LINKER:FILEPATH=/usr/bin/ld
+
+//Path to a program.
+CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/make
+
+//Flags used by the linker during the creation of modules.
+CMAKE_MODULE_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Path to a program.
+CMAKE_NM:FILEPATH=/usr/bin/nm
+
+//Path to a program.
+CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy
+
+//Path to a program.
+CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump
+
+//Value Computed by CMake
+CMAKE_PROJECT_NAME:STATIC=cudnn-training
+
+//Path to a program.
+CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib
+
+//Flags used by the linker during the creation of dll's.
+CMAKE_SHARED_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//If set, runtime paths are not added when installing shared libraries,
+// but are added when building.
+CMAKE_SKIP_INSTALL_RPATH:BOOL=NO
+
+//If set, runtime paths are not added when using shared libraries.
+CMAKE_SKIP_RPATH:BOOL=NO
+
+//Flags used by the linker during the creation of static libraries.
+CMAKE_STATIC_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Path to a program.
+CMAKE_STRIP:FILEPATH=/usr/bin/strip
+
+//If this value is on, makefiles will be generated without the
+// .SILENT directive, and all commands will be echoed to the console
+// during the make.  This is useful for debugging only. With Visual
+// Studio IDE projects all commands are done without /nologo.
+CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE
+
+//Compile device code in 64 bit mode
+CUDA_64_BIT_DEVICE_CODE:BOOL=ON
+
+//Attach the build rule to the CUDA source file.  Enable only when
+// the CUDA source file is added to at most one target.
+CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE:BOOL=ON
+
+//Generate and parse .cubin files in Device mode.
+CUDA_BUILD_CUBIN:BOOL=OFF
+
+//Build in Emulation mode
+CUDA_BUILD_EMULATION:BOOL=OFF
+
+//"cudart" library
+CUDA_CUDART_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcudart.so
+
+//"cuda" library (older versions only).
+CUDA_CUDA_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/libcuda.so
+
+//Directory to put all the output files.  If blank it will default
+// to the CMAKE_CURRENT_BINARY_DIR
+CUDA_GENERATED_OUTPUT_DIR:PATH=
+
+//Generated file extension
+CUDA_HOST_COMPILATION_CPP:BOOL=ON
+
+//Host side compiler used by NVCC
+CUDA_HOST_COMPILER:FILEPATH=/usr/bin/cc
+
+//Path to a program.
+CUDA_NVCC_EXECUTABLE:FILEPATH=/software/cuda-9.1/bin/nvcc
+
+//Semi-colon delimit multiple arguments.
+CUDA_NVCC_FLAGS:STRING=
+
+//Semi-colon delimit multiple arguments.
+CUDA_NVCC_FLAGS_DEBUG:STRING=
+
+//Semi-colon delimit multiple arguments.
+CUDA_NVCC_FLAGS_MINSIZEREL:STRING=
+
+//Semi-colon delimit multiple arguments.
+CUDA_NVCC_FLAGS_RELEASE:STRING=
+
+//Semi-colon delimit multiple arguments.
+CUDA_NVCC_FLAGS_RELWITHDEBINFO:STRING=
+
+//Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile
+CUDA_PROPAGATE_HOST_FLAGS:BOOL=ON
+
+//Path to a file.
+CUDA_SDK_ROOT_DIR:PATH=CUDA_SDK_ROOT_DIR-NOTFOUND
+
+//Compile CUDA objects with separable compilation enabled.  Requires
+// CUDA 5.0+
+CUDA_SEPARABLE_COMPILATION:BOOL=OFF
+
+//Specify the name of the class of CPU architecture for which the
+// input files must be compiled.
+CUDA_TARGET_CPU_ARCH:STRING=
+
+//Path to a file.
+CUDA_TOOLKIT_INCLUDE:PATH=/software/cuda-9.1/include
+
+//Toolkit location.
+CUDA_TOOLKIT_ROOT_DIR:PATH=/software/cuda-9.1
+
+//Toolkit target location.
+CUDA_TOOLKIT_TARGET_DIR:PATH=/software/cuda-9.1
+
+//Use the static version of the CUDA runtime library if available
+CUDA_USE_STATIC_CUDA_RUNTIME:BOOL=ON
+
+//Print out the commands run while compiling the CUDA source file.
+//  With the Makefile generator this defaults to VERBOSE variable
+// specified on the command line, but can be forced on with this
+// option.
+CUDA_VERBOSE_BUILD:BOOL=OFF
+
+//Version of CUDA as computed from nvcc.
+CUDA_VERSION:STRING=9.1
+
+//"cublas" library
+CUDA_cublas_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcublas.so
+
+//static CUDA runtime library
+CUDA_cudart_static_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcudart_static.a
+
+//"cufft" library
+CUDA_cufft_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcufft.so
+
+//"cupti" library
+CUDA_cupti_LIBRARY:FILEPATH=/software/cuda-9.1/extras/CUPTI/lib64/libcupti.so
+
+//"curand" library
+CUDA_curand_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcurand.so
+
+//"cusolver" library
+CUDA_cusolver_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcusolver.so
+
+//"cusparse" library
+CUDA_cusparse_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libcusparse.so
+
+//"nppc" library
+CUDA_nppc_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libnppc.so
+
+//"nppi" library
+CUDA_nppi_LIBRARY:FILEPATH=CUDA_nppi_LIBRARY-NOTFOUND
+
+//"npps" library
+CUDA_npps_LIBRARY:FILEPATH=/software/cuda-9.1/lib64/libnpps.so
+
+//Path to a library.
+CUDA_rt_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/librt.so
+
+//Value Computed by CMake
+cudnn-training_BINARY_DIR:STATIC=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+//Value Computed by CMake
+cudnn-training_SOURCE_DIR:STATIC=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+//Dependencies for the target
+tensor_runtime_LIB_DEPENDS:STATIC=general;/software/cuda-9.1/lib64/libcudart_static.a;general;-lpthread;general;dl;general;/usr/lib/x86_64-linux-gnu/librt.so;general;/software/cuda-9.1/lib64/libcublas.so;general;cudnn;general;-lcurand;
+
+
+########################
+# INTERNAL cache entries
+########################
+
+//ADVANCED property for variable: CMAKE_AR
+CMAKE_AR-ADVANCED:INTERNAL=1
+//This is the directory where this CMakeCache.txt was created
+CMAKE_CACHEFILE_DIR:INTERNAL=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+//Major version of cmake used to create the current loaded cache
+CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
+//Minor version of cmake used to create the current loaded cache
+CMAKE_CACHE_MINOR_VERSION:INTERNAL=5
+//Patch version of cmake used to create the current loaded cache
+CMAKE_CACHE_PATCH_VERSION:INTERNAL=1
+//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE
+CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1
+//Path to CMake executable.
+CMAKE_COMMAND:INTERNAL=/usr/bin/cmake
+//Path to cpack program executable.
+CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack
+//Path to ctest program executable.
+CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest
+//ADVANCED property for variable: CMAKE_CXX_COMPILER
+CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS
+CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG
+CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL
+CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE
+CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO
+CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER
+CMAKE_C_COMPILER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS
+CMAKE_C_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG
+CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL
+CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE
+CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO
+CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Executable file format
+CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS
+CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG
+CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE
+CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS
+CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1
+//Name of external makefile project generator.
+CMAKE_EXTRA_GENERATOR:INTERNAL=
+//Name of generator.
+CMAKE_GENERATOR:INTERNAL=Unix Makefiles
+//Name of generator platform.
+CMAKE_GENERATOR_PLATFORM:INTERNAL=
+//Name of generator toolset.
+CMAKE_GENERATOR_TOOLSET:INTERNAL=
+//Have symbol pthread_create
+CMAKE_HAVE_LIBC_CREATE:INTERNAL=
+//Have library pthreads
+CMAKE_HAVE_PTHREADS_CREATE:INTERNAL=
+//Have library pthread
+CMAKE_HAVE_PTHREAD_CREATE:INTERNAL=1
+//Have include pthread.h
+CMAKE_HAVE_PTHREAD_H:INTERNAL=1
+//Source directory with the top level CMakeLists.txt file for this
+// project
+CMAKE_HOME_DIRECTORY:INTERNAL=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+//Install .so files without execute permission.
+CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1
+//ADVANCED property for variable: CMAKE_LINKER
+CMAKE_LINKER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
+CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS
+CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG
+CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE
+CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_NM
+CMAKE_NM-ADVANCED:INTERNAL=1
+//number of local generators
+CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1
+//ADVANCED property for variable: CMAKE_OBJCOPY
+CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_OBJDUMP
+CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_RANLIB
+CMAKE_RANLIB-ADVANCED:INTERNAL=1
+//Path to CMake installation.
+CMAKE_ROOT:INTERNAL=/usr/share/cmake-3.5
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG
+CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE
+CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH
+CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SKIP_RPATH
+CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS
+CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG
+CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL
+CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE
+CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STRIP
+CMAKE_STRIP-ADVANCED:INTERNAL=1
+//uname command
+CMAKE_UNAME:INTERNAL=/bin/uname
+//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE
+CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_64_BIT_DEVICE_CODE
+CUDA_64_BIT_DEVICE_CODE-ADVANCED:INTERNAL=1
+//List of intermediate files that are part of the cuda dependency
+// scanning.
+CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend
+//ADVANCED property for variable: CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
+CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_BUILD_CUBIN
+CUDA_BUILD_CUBIN-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_BUILD_EMULATION
+CUDA_BUILD_EMULATION-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_CUDART_LIBRARY
+CUDA_CUDART_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_CUDA_LIBRARY
+CUDA_CUDA_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_GENERATED_OUTPUT_DIR
+CUDA_GENERATED_OUTPUT_DIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_HOST_COMPILATION_CPP
+CUDA_HOST_COMPILATION_CPP-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_EXECUTABLE
+CUDA_NVCC_EXECUTABLE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS
+CUDA_NVCC_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_DEBUG
+CUDA_NVCC_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_MINSIZEREL
+CUDA_NVCC_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELEASE
+CUDA_NVCC_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELWITHDEBINFO
+CUDA_NVCC_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_PROPAGATE_HOST_FLAGS
+CUDA_PROPAGATE_HOST_FLAGS-ADVANCED:INTERNAL=1
+//This is the value of the last time CUDA_SDK_ROOT_DIR was set
+// successfully.
+CUDA_SDK_ROOT_DIR_INTERNAL:INTERNAL=CUDA_SDK_ROOT_DIR-NOTFOUND
+//ADVANCED property for variable: CUDA_SEPARABLE_COMPILATION
+CUDA_SEPARABLE_COMPILATION-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_TARGET_CPU_ARCH
+CUDA_TARGET_CPU_ARCH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_TOOLKIT_INCLUDE
+CUDA_TOOLKIT_INCLUDE-ADVANCED:INTERNAL=1
+//This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was
+// set successfully.
+CUDA_TOOLKIT_ROOT_DIR_INTERNAL:INTERNAL=/software/cuda-9.1
+//ADVANCED property for variable: CUDA_TOOLKIT_TARGET_DIR
+CUDA_TOOLKIT_TARGET_DIR-ADVANCED:INTERNAL=1
+//This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was
+// set successfully.
+CUDA_TOOLKIT_TARGET_DIR_INTERNAL:INTERNAL=/software/cuda-9.1
+//ADVANCED property for variable: CUDA_VERBOSE_BUILD
+CUDA_VERBOSE_BUILD-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_VERSION
+CUDA_VERSION-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cublas_LIBRARY
+CUDA_cublas_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cudart_static_LIBRARY
+CUDA_cudart_static_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cufft_LIBRARY
+CUDA_cufft_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cupti_LIBRARY
+CUDA_cupti_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_curand_LIBRARY
+CUDA_curand_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cusolver_LIBRARY
+CUDA_cusolver_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cusparse_LIBRARY
+CUDA_cusparse_LIBRARY-ADVANCED:INTERNAL=1
+//Location of make2cmake.cmake
+CUDA_make2cmake:INTERNAL=/usr/share/cmake-3.5/Modules/FindCUDA/make2cmake.cmake
+//ADVANCED property for variable: CUDA_nppc_LIBRARY
+CUDA_nppc_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppi_LIBRARY
+CUDA_nppi_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_npps_LIBRARY
+CUDA_npps_LIBRARY-ADVANCED:INTERNAL=1
+//Location of parse_cubin.cmake
+CUDA_parse_cubin:INTERNAL=/usr/share/cmake-3.5/Modules/FindCUDA/parse_cubin.cmake
+//Location of run_nvcc.cmake
+CUDA_run_nvcc:INTERNAL=/usr/share/cmake-3.5/Modules/FindCUDA/run_nvcc.cmake
+//Details about finding CUDA
+FIND_PACKAGE_MESSAGE_DETAILS_CUDA:INTERNAL=[/software/cuda-9.1][/software/cuda-9.1/bin/nvcc][/software/cuda-9.1/include][/software/cuda-9.1/lib64/libcudart.so][v9.1(6.5)]
+//Details about finding Threads
+FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()]
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCCompiler.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCCompiler.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..f40522e627a66ddca0a1b7c75b83836d5e12e77a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCCompiler.cmake
@@ -0,0 +1,67 @@
+set(CMAKE_C_COMPILER "/usr/bin/cc")
+set(CMAKE_C_COMPILER_ARG1 "")
+set(CMAKE_C_COMPILER_ID "GNU")
+set(CMAKE_C_COMPILER_VERSION "5.4.0")
+set(CMAKE_C_COMPILER_WRAPPER "")
+set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "11")
+set(CMAKE_C_COMPILE_FEATURES "c_function_prototypes;c_restrict;c_variadic_macros;c_static_assert")
+set(CMAKE_C90_COMPILE_FEATURES "c_function_prototypes")
+set(CMAKE_C99_COMPILE_FEATURES "c_restrict;c_variadic_macros")
+set(CMAKE_C11_COMPILE_FEATURES "c_static_assert")
+
+set(CMAKE_C_PLATFORM_ID "Linux")
+set(CMAKE_C_SIMULATE_ID "")
+set(CMAKE_C_SIMULATE_VERSION "")
+
+set(CMAKE_AR "/usr/bin/ar")
+set(CMAKE_RANLIB "/usr/bin/ranlib")
+set(CMAKE_LINKER "/usr/bin/ld")
+set(CMAKE_COMPILER_IS_GNUCC 1)
+set(CMAKE_C_COMPILER_LOADED 1)
+set(CMAKE_C_COMPILER_WORKS TRUE)
+set(CMAKE_C_ABI_COMPILED TRUE)
+set(CMAKE_COMPILER_IS_MINGW )
+set(CMAKE_COMPILER_IS_CYGWIN )
+if(CMAKE_COMPILER_IS_CYGWIN)
+  set(CYGWIN 1)
+  set(UNIX 1)
+endif()
+
+set(CMAKE_C_COMPILER_ENV_VAR "CC")
+
+if(CMAKE_COMPILER_IS_MINGW)
+  set(MINGW 1)
+endif()
+set(CMAKE_C_COMPILER_ID_RUN 1)
+set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
+set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
+set(CMAKE_C_LINKER_PREFERENCE 10)
+
+# Save compiler ABI information.
+set(CMAKE_C_SIZEOF_DATA_PTR "8")
+set(CMAKE_C_COMPILER_ABI "ELF")
+set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+
+if(CMAKE_C_SIZEOF_DATA_PTR)
+  set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
+endif()
+
+if(CMAKE_C_COMPILER_ABI)
+  set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
+endif()
+
+if(CMAKE_C_LIBRARY_ARCHITECTURE)
+  set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+endif()
+
+set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
+if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
+  set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
+endif()
+
+
+
+
+set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "c")
+set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
+set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCXXCompiler.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCXXCompiler.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..013ee9298fb861e7d0350d49a1fc08c0274b5e59
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeCXXCompiler.cmake
@@ -0,0 +1,68 @@
+set(CMAKE_CXX_COMPILER "/usr/bin/c++")
+set(CMAKE_CXX_COMPILER_ARG1 "")
+set(CMAKE_CXX_COMPILER_ID "GNU")
+set(CMAKE_CXX_COMPILER_VERSION "5.4.0")
+set(CMAKE_CXX_COMPILER_WRAPPER "")
+set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "98")
+set(CMAKE_CXX_COMPILE_FEATURES "cxx_template_template_parameters;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
+set(CMAKE_CXX98_COMPILE_FEATURES "cxx_template_template_parameters")
+set(CMAKE_CXX11_COMPILE_FEATURES "cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
+set(CMAKE_CXX14_COMPILE_FEATURES "cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
+
+set(CMAKE_CXX_PLATFORM_ID "Linux")
+set(CMAKE_CXX_SIMULATE_ID "")
+set(CMAKE_CXX_SIMULATE_VERSION "")
+
+set(CMAKE_AR "/usr/bin/ar")
+set(CMAKE_RANLIB "/usr/bin/ranlib")
+set(CMAKE_LINKER "/usr/bin/ld")
+set(CMAKE_COMPILER_IS_GNUCXX 1)
+set(CMAKE_CXX_COMPILER_LOADED 1)
+set(CMAKE_CXX_COMPILER_WORKS TRUE)
+set(CMAKE_CXX_ABI_COMPILED TRUE)
+set(CMAKE_COMPILER_IS_MINGW )
+set(CMAKE_COMPILER_IS_CYGWIN )
+if(CMAKE_COMPILER_IS_CYGWIN)
+  set(CYGWIN 1)
+  set(UNIX 1)
+endif()
+
+set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
+
+if(CMAKE_COMPILER_IS_MINGW)
+  set(MINGW 1)
+endif()
+set(CMAKE_CXX_COMPILER_ID_RUN 1)
+set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
+set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;mm;CPP)
+set(CMAKE_CXX_LINKER_PREFERENCE 30)
+set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
+
+# Save compiler ABI information.
+set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
+set(CMAKE_CXX_COMPILER_ABI "ELF")
+set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+
+if(CMAKE_CXX_SIZEOF_DATA_PTR)
+  set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
+endif()
+
+if(CMAKE_CXX_COMPILER_ABI)
+  set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
+endif()
+
+if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
+  set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+endif()
+
+set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
+if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
+  set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
+endif()
+
+
+
+
+set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;c")
+set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
+set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_C.bin b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_C.bin
new file mode 100755
index 0000000000000000000000000000000000000000..8fadb3a4377be9080de10797e815ab49faade975
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_C.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_CXX.bin b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_CXX.bin
new file mode 100755
index 0000000000000000000000000000000000000000..f89cba0f5e50283f60499d801dbc6711babc578c
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeDetermineCompilerABI_CXX.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeSystem.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeSystem.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..daf4b4c75bfa1a5e36628a6e7f99442486dce87d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CMakeSystem.cmake
@@ -0,0 +1,15 @@
+set(CMAKE_HOST_SYSTEM "Linux-4.15.0-36-generic")
+set(CMAKE_HOST_SYSTEM_NAME "Linux")
+set(CMAKE_HOST_SYSTEM_VERSION "4.15.0-36-generic")
+set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
+
+
+
+set(CMAKE_SYSTEM "Linux-4.15.0-36-generic")
+set(CMAKE_SYSTEM_NAME "Linux")
+set(CMAKE_SYSTEM_VERSION "4.15.0-36-generic")
+set(CMAKE_SYSTEM_PROCESSOR "x86_64")
+
+set(CMAKE_CROSSCOMPILING "FALSE")
+
+set(CMAKE_SYSTEM_LOADED 1)
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/CMakeCCompilerId.c b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/CMakeCCompilerId.c
new file mode 100644
index 0000000000000000000000000000000000000000..570a15e994e4f10ca4a05b4451ea350fb942337f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/CMakeCCompilerId.c
@@ -0,0 +1,544 @@
+#ifdef __cplusplus
+# error "A C++ compiler has been selected for C."
+#endif
+
+#if defined(__18CXX)
+# define ID_VOID_MAIN
+#endif
+
+
+/* Version number components: V=Version, R=Revision, P=Patch
+   Version date components:   YYYY=Year, MM=Month,   DD=Day  */
+
+#if defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+  /* __INTEL_COMPILER = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
+# if defined(__INTEL_COMPILER_UPDATE)
+#  define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
+# else
+#  define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER   % 10)
+# endif
+# if defined(__INTEL_COMPILER_BUILD_DATE)
+  /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
+#  define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
+# endif
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+
+#elif defined(__PATHCC__)
+# define COMPILER_ID "PathScale"
+# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
+# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
+# if defined(__PATHCC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
+# endif
+
+#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
+# define COMPILER_ID "Embarcadero"
+# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
+# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
+# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__     & 0xFFFF)
+
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+  /* __BORLANDC__ = 0xVRR */
+# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
+# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
+
+#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
+# define COMPILER_ID "Watcom"
+   /* __WATCOMC__ = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "OpenWatcom"
+   /* __WATCOMC__ = VVRP + 1100 */
+# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__SUNPRO_C)
+# define COMPILER_ID "SunPro"
+# if __SUNPRO_C >= 0x5100
+   /* __SUNPRO_C = 0xVRRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_C    & 0xF)
+# else
+   /* __SUNPRO_CC = 0xVRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_C    & 0xF)
+# endif
+
+#elif defined(__HP_cc)
+# define COMPILER_ID "HP"
+  /* __HP_cc = VVRRPP */
+# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000)
+# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__HP_cc     % 100)
+
+#elif defined(__DECC)
+# define COMPILER_ID "Compaq"
+  /* __DECC_VER = VVRRTPPPP */
+# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000)
+# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000  % 100)
+# define COMPILER_VERSION_PATCH DEC(__DECC_VER         % 10000)
+
+#elif defined(__IBMC__) && defined(__COMPILER_VER__)
+# define COMPILER_ID "zOS"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+
+#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800
+# define COMPILER_ID "XL"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+
+#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800
+# define COMPILER_ID "VisualAge"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
+# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
+# if defined(__PGIC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
+# endif
+
+#elif defined(_CRAYC)
+# define COMPILER_ID "Cray"
+# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
+# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
+
+#elif defined(__TI_COMPILER_VERSION__)
+# define COMPILER_ID "TI"
+  /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
+# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
+# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000   % 1000)
+# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__        % 1000)
+
+#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version)
+# define COMPILER_ID "Fujitsu"
+
+#elif defined(__TINYC__)
+# define COMPILER_ID "TinyCC"
+
+#elif defined(__SCO_VERSION__)
+# define COMPILER_ID "SCO"
+
+#elif defined(__clang__) && defined(__apple_build_version__)
+# define COMPILER_ID "AppleClang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
+
+#elif defined(__clang__)
+# define COMPILER_ID "Clang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+
+#elif defined(__GNUC__)
+# define COMPILER_ID "GNU"
+# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
+# if defined(__GNUC_MINOR__)
+#  define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+  /* _MSC_VER = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
+# if defined(_MSC_FULL_VER)
+#  if _MSC_VER >= 1400
+    /* _MSC_FULL_VER = VVRRPPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
+#  else
+    /* _MSC_FULL_VER = VVRRPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
+#  endif
+# endif
+# if defined(_MSC_BUILD)
+#  define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
+# endif
+
+#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
+# define COMPILER_ID "ADSP"
+#if defined(__VISUALDSPVERSION__)
+  /* __VISUALDSPVERSION__ = 0xVVRRPP00 */
+# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24)
+# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF)
+# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8  & 0xFF)
+#endif
+
+#elif defined(__IAR_SYSTEMS_ICC__ ) || defined(__IAR_SYSTEMS_ICC)
+# define COMPILER_ID "IAR"
+
+#elif defined(__ARMCC_VERSION)
+# define COMPILER_ID "ARMCC"
+#if __ARMCC_VERSION >= 1000000
+  /* __ARMCC_VERSION = VRRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION     % 10000)
+#else
+  /* __ARMCC_VERSION = VRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION    % 10000)
+#endif
+
+
+#elif defined(SDCC)
+# define COMPILER_ID "SDCC"
+  /* SDCC = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(SDCC/100)
+#  define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(SDCC    % 10)
+
+#elif defined(_SGI_COMPILER_VERSION) || defined(_COMPILER_VERSION)
+# define COMPILER_ID "MIPSpro"
+# if defined(_SGI_COMPILER_VERSION)
+  /* _SGI_COMPILER_VERSION = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(_SGI_COMPILER_VERSION/100)
+#  define COMPILER_VERSION_MINOR DEC(_SGI_COMPILER_VERSION/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(_SGI_COMPILER_VERSION    % 10)
+# else
+  /* _COMPILER_VERSION = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(_COMPILER_VERSION/100)
+#  define COMPILER_VERSION_MINOR DEC(_COMPILER_VERSION/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(_COMPILER_VERSION    % 10)
+# endif
+
+
+/* These compilers are either not known or too old to define an
+  identification macro.  Try to identify the platform and guess that
+  it is the native compiler.  */
+#elif defined(__sgi)
+# define COMPILER_ID "MIPSpro"
+
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+
+#else /* unknown compiler */
+# define COMPILER_ID ""
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+#ifdef SIMULATE_ID
+char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
+#endif
+
+#ifdef __QNXNTO__
+char const* qnxnto = "INFO" ":" "qnxnto[]";
+#endif
+
+#if defined(__CRAYXE) || defined(__CRAYXC)
+char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
+#endif
+
+#define STRINGIFY_HELPER(X) #X
+#define STRINGIFY(X) STRINGIFY_HELPER(X)
+
+/* Identify known platforms by name.  */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+
+#elif defined(__sgi) || defined(__sgi__) || defined(_SGI)
+# define PLATFORM_ID "IRIX"
+
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+
+#elif defined(__HAIKU__)
+# define PLATFORM_ID "Haiku"
+
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+
+#elif defined(__WATCOMC__)
+# if defined(__LINUX__)
+#  define PLATFORM_ID "Linux"
+
+# elif defined(__DOS__)
+#  define PLATFORM_ID "DOS"
+
+# elif defined(__OS2__)
+#  define PLATFORM_ID "OS2"
+
+# elif defined(__WINDOWS__)
+#  define PLATFORM_ID "Windows3x"
+
+# else /* unknown platform */
+#  define PLATFORM_ID ""
+# endif
+
+#else /* unknown platform */
+# define PLATFORM_ID ""
+
+#endif
+
+/* For windows compilers MSVC and Intel we can determine
+   the architecture of the compiler being used.  This is because
+   the compilers do not have flags that can change the architecture,
+   but rather depend on which compiler is being used
+*/
+#if defined(_WIN32) && defined(_MSC_VER)
+# if defined(_M_IA64)
+#  define ARCHITECTURE_ID "IA64"
+
+# elif defined(_M_X64) || defined(_M_AMD64)
+#  define ARCHITECTURE_ID "x64"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# elif defined(_M_ARM)
+#  if _M_ARM == 4
+#   define ARCHITECTURE_ID "ARMV4I"
+#  elif _M_ARM == 5
+#   define ARCHITECTURE_ID "ARMV5I"
+#  else
+#   define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
+#  endif
+
+# elif defined(_M_MIPS)
+#  define ARCHITECTURE_ID "MIPS"
+
+# elif defined(_M_SH)
+#  define ARCHITECTURE_ID "SHx"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__WATCOMC__)
+# if defined(_M_I86)
+#  define ARCHITECTURE_ID "I86"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#else
+#  define ARCHITECTURE_ID ""
+#endif
+
+/* Convert integer to decimal digit literals.  */
+#define DEC(n)                   \
+  ('0' + (((n) / 10000000)%10)), \
+  ('0' + (((n) / 1000000)%10)),  \
+  ('0' + (((n) / 100000)%10)),   \
+  ('0' + (((n) / 10000)%10)),    \
+  ('0' + (((n) / 1000)%10)),     \
+  ('0' + (((n) / 100)%10)),      \
+  ('0' + (((n) / 10)%10)),       \
+  ('0' +  ((n) % 10))
+
+/* Convert integer to hex digit literals.  */
+#define HEX(n)             \
+  ('0' + ((n)>>28 & 0xF)), \
+  ('0' + ((n)>>24 & 0xF)), \
+  ('0' + ((n)>>20 & 0xF)), \
+  ('0' + ((n)>>16 & 0xF)), \
+  ('0' + ((n)>>12 & 0xF)), \
+  ('0' + ((n)>>8  & 0xF)), \
+  ('0' + ((n)>>4  & 0xF)), \
+  ('0' + ((n)     & 0xF))
+
+/* Construct a string literal encoding the version number components. */
+#ifdef COMPILER_VERSION_MAJOR
+char const info_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
+  COMPILER_VERSION_MAJOR,
+# ifdef COMPILER_VERSION_MINOR
+  '.', COMPILER_VERSION_MINOR,
+#  ifdef COMPILER_VERSION_PATCH
+   '.', COMPILER_VERSION_PATCH,
+#   ifdef COMPILER_VERSION_TWEAK
+    '.', COMPILER_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct a string literal encoding the version number components. */
+#ifdef SIMULATE_VERSION_MAJOR
+char const info_simulate_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
+  SIMULATE_VERSION_MAJOR,
+# ifdef SIMULATE_VERSION_MINOR
+  '.', SIMULATE_VERSION_MINOR,
+#  ifdef SIMULATE_VERSION_PATCH
+   '.', SIMULATE_VERSION_PATCH,
+#   ifdef SIMULATE_VERSION_TWEAK
+    '.', SIMULATE_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
+
+
+
+
+const char* info_language_dialect_default = "INFO" ":" "dialect_default["
+#if !defined(__STDC_VERSION__)
+  "90"
+#elif __STDC_VERSION__ >= 201000L
+  "11"
+#elif __STDC_VERSION__ >= 199901L
+  "99"
+#else
+#endif
+"]";
+
+/*--------------------------------------------------------------------------*/
+
+#ifdef ID_VOID_MAIN
+void main() {}
+#else
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+  require += info_arch[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+#ifdef SIMULATE_ID
+  require += info_simulate[argc];
+#endif
+#ifdef SIMULATE_VERSION_MAJOR
+  require += info_simulate_version[argc];
+#endif
+#if defined(__CRAYXE) || defined(__CRAYXC)
+  require += info_cray[argc];
+#endif
+  require += info_language_dialect_default[argc];
+  (void)argv;
+  return require;
+}
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out
new file mode 100755
index 0000000000000000000000000000000000000000..afc42a94bbc7371ac5a573a3b9eb6b0812ecca21
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/CMakeCXXCompilerId.cpp b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/CMakeCXXCompilerId.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e6d853637c6f7637dd8672b59612a9263a4d0244
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/CMakeCXXCompilerId.cpp
@@ -0,0 +1,533 @@
+/* This source file must have a .cpp extension so that all C++ compilers
+   recognize the extension without flags.  Borland does not know .cxx for
+   example.  */
+#ifndef __cplusplus
+# error "A C compiler has been selected for C++."
+#endif
+
+
+/* Version number components: V=Version, R=Revision, P=Patch
+   Version date components:   YYYY=Year, MM=Month,   DD=Day  */
+
+#if defined(__COMO__)
+# define COMPILER_ID "Comeau"
+  /* __COMO_VERSION__ = VRR */
+# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100)
+# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100)
+
+#elif defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+  /* __INTEL_COMPILER = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
+# if defined(__INTEL_COMPILER_UPDATE)
+#  define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
+# else
+#  define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER   % 10)
+# endif
+# if defined(__INTEL_COMPILER_BUILD_DATE)
+  /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
+#  define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
+# endif
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+
+#elif defined(__PATHCC__)
+# define COMPILER_ID "PathScale"
+# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
+# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
+# if defined(__PATHCC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
+# endif
+
+#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
+# define COMPILER_ID "Embarcadero"
+# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
+# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
+# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__     & 0xFFFF)
+
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+  /* __BORLANDC__ = 0xVRR */
+# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
+# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
+
+#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
+# define COMPILER_ID "Watcom"
+   /* __WATCOMC__ = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "OpenWatcom"
+   /* __WATCOMC__ = VVRP + 1100 */
+# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__SUNPRO_CC)
+# define COMPILER_ID "SunPro"
+# if __SUNPRO_CC >= 0x5100
+   /* __SUNPRO_CC = 0xVRRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC    & 0xF)
+# else
+   /* __SUNPRO_CC = 0xVRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC    & 0xF)
+# endif
+
+#elif defined(__HP_aCC)
+# define COMPILER_ID "HP"
+  /* __HP_aCC = VVRRPP */
+# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
+# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__HP_aCC     % 100)
+
+#elif defined(__DECCXX)
+# define COMPILER_ID "Compaq"
+  /* __DECCXX_VER = VVRRTPPPP */
+# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
+# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000  % 100)
+# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER         % 10000)
+
+#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
+# define COMPILER_ID "zOS"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+
+#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
+# define COMPILER_ID "XL"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+
+#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
+# define COMPILER_ID "VisualAge"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
+# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
+# if defined(__PGIC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
+# endif
+
+#elif defined(_CRAYC)
+# define COMPILER_ID "Cray"
+# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
+# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
+
+#elif defined(__TI_COMPILER_VERSION__)
+# define COMPILER_ID "TI"
+  /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
+# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
+# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000   % 1000)
+# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__        % 1000)
+
+#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version)
+# define COMPILER_ID "Fujitsu"
+
+#elif defined(__SCO_VERSION__)
+# define COMPILER_ID "SCO"
+
+#elif defined(__clang__) && defined(__apple_build_version__)
+# define COMPILER_ID "AppleClang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
+
+#elif defined(__clang__)
+# define COMPILER_ID "Clang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+
+#elif defined(__GNUC__)
+# define COMPILER_ID "GNU"
+# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
+# if defined(__GNUC_MINOR__)
+#  define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+  /* _MSC_VER = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
+# if defined(_MSC_FULL_VER)
+#  if _MSC_VER >= 1400
+    /* _MSC_FULL_VER = VVRRPPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
+#  else
+    /* _MSC_FULL_VER = VVRRPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
+#  endif
+# endif
+# if defined(_MSC_BUILD)
+#  define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
+# endif
+
+#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
+# define COMPILER_ID "ADSP"
+#if defined(__VISUALDSPVERSION__)
+  /* __VISUALDSPVERSION__ = 0xVVRRPP00 */
+# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24)
+# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF)
+# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8  & 0xFF)
+#endif
+
+#elif defined(__IAR_SYSTEMS_ICC__ ) || defined(__IAR_SYSTEMS_ICC)
+# define COMPILER_ID "IAR"
+
+#elif defined(__ARMCC_VERSION)
+# define COMPILER_ID "ARMCC"
+#if __ARMCC_VERSION >= 1000000
+  /* __ARMCC_VERSION = VRRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION     % 10000)
+#else
+  /* __ARMCC_VERSION = VRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION    % 10000)
+#endif
+
+
+#elif defined(_SGI_COMPILER_VERSION) || defined(_COMPILER_VERSION)
+# define COMPILER_ID "MIPSpro"
+# if defined(_SGI_COMPILER_VERSION)
+  /* _SGI_COMPILER_VERSION = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(_SGI_COMPILER_VERSION/100)
+#  define COMPILER_VERSION_MINOR DEC(_SGI_COMPILER_VERSION/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(_SGI_COMPILER_VERSION    % 10)
+# else
+  /* _COMPILER_VERSION = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(_COMPILER_VERSION/100)
+#  define COMPILER_VERSION_MINOR DEC(_COMPILER_VERSION/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(_COMPILER_VERSION    % 10)
+# endif
+
+
+/* These compilers are either not known or too old to define an
+  identification macro.  Try to identify the platform and guess that
+  it is the native compiler.  */
+#elif defined(__sgi)
+# define COMPILER_ID "MIPSpro"
+
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+
+#else /* unknown compiler */
+# define COMPILER_ID ""
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+#ifdef SIMULATE_ID
+char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
+#endif
+
+#ifdef __QNXNTO__
+char const* qnxnto = "INFO" ":" "qnxnto[]";
+#endif
+
+#if defined(__CRAYXE) || defined(__CRAYXC)
+char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
+#endif
+
+#define STRINGIFY_HELPER(X) #X
+#define STRINGIFY(X) STRINGIFY_HELPER(X)
+
+/* Identify known platforms by name.  */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+
+#elif defined(__sgi) || defined(__sgi__) || defined(_SGI)
+# define PLATFORM_ID "IRIX"
+
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+
+#elif defined(__HAIKU__)
+# define PLATFORM_ID "Haiku"
+
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+
+#elif defined(__WATCOMC__)
+# if defined(__LINUX__)
+#  define PLATFORM_ID "Linux"
+
+# elif defined(__DOS__)
+#  define PLATFORM_ID "DOS"
+
+# elif defined(__OS2__)
+#  define PLATFORM_ID "OS2"
+
+# elif defined(__WINDOWS__)
+#  define PLATFORM_ID "Windows3x"
+
+# else /* unknown platform */
+#  define PLATFORM_ID ""
+# endif
+
+#else /* unknown platform */
+# define PLATFORM_ID ""
+
+#endif
+
+/* For windows compilers MSVC and Intel we can determine
+   the architecture of the compiler being used.  This is because
+   the compilers do not have flags that can change the architecture,
+   but rather depend on which compiler is being used
+*/
+#if defined(_WIN32) && defined(_MSC_VER)
+# if defined(_M_IA64)
+#  define ARCHITECTURE_ID "IA64"
+
+# elif defined(_M_X64) || defined(_M_AMD64)
+#  define ARCHITECTURE_ID "x64"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# elif defined(_M_ARM)
+#  if _M_ARM == 4
+#   define ARCHITECTURE_ID "ARMV4I"
+#  elif _M_ARM == 5
+#   define ARCHITECTURE_ID "ARMV5I"
+#  else
+#   define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
+#  endif
+
+# elif defined(_M_MIPS)
+#  define ARCHITECTURE_ID "MIPS"
+
+# elif defined(_M_SH)
+#  define ARCHITECTURE_ID "SHx"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__WATCOMC__)
+# if defined(_M_I86)
+#  define ARCHITECTURE_ID "I86"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#else
+#  define ARCHITECTURE_ID ""
+#endif
+
+/* Convert integer to decimal digit literals.  */
+#define DEC(n)                   \
+  ('0' + (((n) / 10000000)%10)), \
+  ('0' + (((n) / 1000000)%10)),  \
+  ('0' + (((n) / 100000)%10)),   \
+  ('0' + (((n) / 10000)%10)),    \
+  ('0' + (((n) / 1000)%10)),     \
+  ('0' + (((n) / 100)%10)),      \
+  ('0' + (((n) / 10)%10)),       \
+  ('0' +  ((n) % 10))
+
+/* Convert integer to hex digit literals.  */
+#define HEX(n)             \
+  ('0' + ((n)>>28 & 0xF)), \
+  ('0' + ((n)>>24 & 0xF)), \
+  ('0' + ((n)>>20 & 0xF)), \
+  ('0' + ((n)>>16 & 0xF)), \
+  ('0' + ((n)>>12 & 0xF)), \
+  ('0' + ((n)>>8  & 0xF)), \
+  ('0' + ((n)>>4  & 0xF)), \
+  ('0' + ((n)     & 0xF))
+
+/* Construct a string literal encoding the version number components. */
+#ifdef COMPILER_VERSION_MAJOR
+char const info_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
+  COMPILER_VERSION_MAJOR,
+# ifdef COMPILER_VERSION_MINOR
+  '.', COMPILER_VERSION_MINOR,
+#  ifdef COMPILER_VERSION_PATCH
+   '.', COMPILER_VERSION_PATCH,
+#   ifdef COMPILER_VERSION_TWEAK
+    '.', COMPILER_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct a string literal encoding the version number components. */
+#ifdef SIMULATE_VERSION_MAJOR
+char const info_simulate_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
+  SIMULATE_VERSION_MAJOR,
+# ifdef SIMULATE_VERSION_MINOR
+  '.', SIMULATE_VERSION_MINOR,
+#  ifdef SIMULATE_VERSION_PATCH
+   '.', SIMULATE_VERSION_PATCH,
+#   ifdef SIMULATE_VERSION_TWEAK
+    '.', SIMULATE_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
+
+
+
+
+const char* info_language_dialect_default = "INFO" ":" "dialect_default["
+#if __cplusplus >= 201402L
+  "14"
+#elif __cplusplus >= 201103L
+  "11"
+#else
+  "98"
+#endif
+"]";
+
+/*--------------------------------------------------------------------------*/
+
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+#ifdef SIMULATE_ID
+  require += info_simulate[argc];
+#endif
+#ifdef SIMULATE_VERSION_MAJOR
+  require += info_simulate_version[argc];
+#endif
+#if defined(__CRAYXE) || defined(__CRAYXC)
+  require += info_cray[argc];
+#endif
+  require += info_language_dialect_default[argc];
+  (void)argv;
+  return require;
+}
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out
new file mode 100755
index 0000000000000000000000000000000000000000..648b86701f00871e7148b5f6fcce25d049005826
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeDirectoryInformation.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeDirectoryInformation.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..289a9a79806014b6aeb5fd1006ec61b9b84ad4f0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeDirectoryInformation.cmake
@@ -0,0 +1,16 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Relative path conversion top directories.
+set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt")
+set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build")
+
+# Force unix paths in dependencies.
+set(CMAKE_FORCE_UNIX_PATHS 1)
+
+
+# The C and CXX include file regular expressions for this directory.
+set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
+set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
+set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
+set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeError.log b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeError.log
new file mode 100644
index 0000000000000000000000000000000000000000..df25b3106d9fa8bd2bb0165b31ac5a59ed0270cb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeError.log
@@ -0,0 +1,55 @@
+Determining if the pthread_create exist failed with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_4a9d8/fast"
+/usr/bin/make -f CMakeFiles/cmTC_4a9d8.dir/build.make CMakeFiles/cmTC_4a9d8.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o
+/usr/bin/cc    -fPIC    -o CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o   -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/CheckSymbolExists.c
+Linking C executable cmTC_4a9d8
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_4a9d8.dir/link.txt --verbose=1
+/usr/bin/cc  -fPIC     CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o  -o cmTC_4a9d8 -rdynamic 
+CMakeFiles/cmTC_4a9d8.dir/CheckSymbolExists.c.o: In function `main':
+CheckSymbolExists.c:(.text+0x1b): undefined reference to `pthread_create'
+collect2: error: ld returned 1 exit status
+CMakeFiles/cmTC_4a9d8.dir/build.make:97: recipe for target 'cmTC_4a9d8' failed
+make[1]: *** [cmTC_4a9d8] Error 1
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Makefile:126: recipe for target 'cmTC_4a9d8/fast' failed
+make: *** [cmTC_4a9d8/fast] Error 2
+
+File /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/CheckSymbolExists.c:
+/* */
+#include <pthread.h>
+
+int main(int argc, char** argv)
+{
+  (void)argv;
+#ifndef pthread_create
+  return ((int*)(&pthread_create))[argc];
+#else
+  (void)argc;
+  return 0;
+#endif
+}
+
+Determining if the function pthread_create exists in the pthreads failed with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_1413b/fast"
+/usr/bin/make -f CMakeFiles/cmTC_1413b.dir/build.make CMakeFiles/cmTC_1413b.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_1413b.dir/CheckFunctionExists.c.o
+/usr/bin/cc    -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create   -o CMakeFiles/cmTC_1413b.dir/CheckFunctionExists.c.o   -c /usr/share/cmake-3.5/Modules/CheckFunctionExists.c
+Linking C executable cmTC_1413b
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_1413b.dir/link.txt --verbose=1
+/usr/bin/cc  -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create    CMakeFiles/cmTC_1413b.dir/CheckFunctionExists.c.o  -o cmTC_1413b -rdynamic -lpthreads 
+/usr/bin/ld: cannot find -lpthreads
+collect2: error: ld returned 1 exit status
+CMakeFiles/cmTC_1413b.dir/build.make:97: recipe for target 'cmTC_1413b' failed
+make[1]: *** [cmTC_1413b] Error 1
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Makefile:126: recipe for target 'cmTC_1413b/fast' failed
+make: *** [cmTC_1413b/fast] Error 2
+
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeOutput.log b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeOutput.log
new file mode 100644
index 0000000000000000000000000000000000000000..8c55cb7b351b2f10aa1430b64ec906416b12a072
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeOutput.log
@@ -0,0 +1,584 @@
+The system is: Linux - 4.15.0-36-generic - x86_64
+Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded.
+Compiler: /usr/bin/cc 
+Build flags: 
+Id flags: 
+
+The output was:
+0
+
+
+Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out"
+
+The C compiler identification is GNU, found in "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdC/a.out"
+
+Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded.
+Compiler: /usr/bin/c++ 
+Build flags: 
+Id flags: 
+
+The output was:
+0
+
+
+Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out"
+
+The CXX compiler identification is GNU, found in "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/3.5.1/CompilerIdCXX/a.out"
+
+Determining if the C compiler works passed with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_f2cd6/fast"
+/usr/bin/make -f CMakeFiles/cmTC_f2cd6.dir/build.make CMakeFiles/cmTC_f2cd6.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_f2cd6.dir/testCCompiler.c.o
+/usr/bin/cc     -o CMakeFiles/cmTC_f2cd6.dir/testCCompiler.c.o   -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/testCCompiler.c
+Linking C executable cmTC_f2cd6
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_f2cd6.dir/link.txt --verbose=1
+/usr/bin/cc       CMakeFiles/cmTC_f2cd6.dir/testCCompiler.c.o  -o cmTC_f2cd6 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+Detecting C compiler ABI info compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_2ea18/fast"
+/usr/bin/make -f CMakeFiles/cmTC_2ea18.dir/build.make CMakeFiles/cmTC_2ea18.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o
+/usr/bin/cc     -o CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o   -c /usr/share/cmake-3.5/Modules/CMakeCCompilerABI.c
+Linking C executable cmTC_2ea18
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_2ea18.dir/link.txt --verbose=1
+/usr/bin/cc      -v CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o  -o cmTC_2ea18 -rdynamic  
+Using built-in specs.
+COLLECT_GCC=/usr/bin/cc
+COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
+Thread model: posix
+gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) 
+COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/
+LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_2ea18' '-rdynamic' '-mtune=generic' '-march=x86-64'
+ /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/cc95kyZ3.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_2ea18 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+Parsed C implicit link information from above output:
+  link line regex: [^( *|.*[/\])(ld|([^/\]+-)?ld|collect2)[^/\]*( |$)]
+  ignore line: [Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp]
+  ignore line: []
+  ignore line: [Run Build Command:"/usr/bin/make" "cmTC_2ea18/fast"]
+  ignore line: [/usr/bin/make -f CMakeFiles/cmTC_2ea18.dir/build.make CMakeFiles/cmTC_2ea18.dir/build]
+  ignore line: [make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp']
+  ignore line: [Building C object CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o]
+  ignore line: [/usr/bin/cc     -o CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o   -c /usr/share/cmake-3.5/Modules/CMakeCCompilerABI.c]
+  ignore line: [Linking C executable cmTC_2ea18]
+  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_2ea18.dir/link.txt --verbose=1]
+  ignore line: [/usr/bin/cc      -v CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o  -o cmTC_2ea18 -rdynamic  ]
+  ignore line: [Using built-in specs.]
+  ignore line: [COLLECT_GCC=/usr/bin/cc]
+  ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper]
+  ignore line: [Target: x86_64-linux-gnu]
+  ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu]
+  ignore line: [Thread model: posix]
+  ignore line: [gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) ]
+  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/]
+  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_2ea18' '-rdynamic' '-mtune=generic' '-march=x86-64']
+  link line: [ /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/cc95kyZ3.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_2ea18 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/collect2] ==> ignore
+    arg [-plugin] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so] ==> ignore
+    arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper] ==> ignore
+    arg [-plugin-opt=-fresolution=/tmp/cc95kyZ3.res] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [-plugin-opt=-pass-through=-lc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [--sysroot=/] ==> ignore
+    arg [--build-id] ==> ignore
+    arg [--eh-frame-hdr] ==> ignore
+    arg [-m] ==> ignore
+    arg [elf_x86_64] ==> ignore
+    arg [--hash-style=gnu] ==> ignore
+    arg [--as-needed] ==> ignore
+    arg [-export-dynamic] ==> ignore
+    arg [-dynamic-linker] ==> ignore
+    arg [/lib64/ld-linux-x86-64.so.2] ==> ignore
+    arg [-zrelro] ==> ignore
+    arg [-o] ==> ignore
+    arg [cmTC_2ea18] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o] ==> ignore
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib]
+    arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu]
+    arg [-L/lib/../lib] ==> dir [/lib/../lib]
+    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
+    arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib]
+    arg [-L.] ==> ignore
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..]
+    arg [CMakeFiles/cmTC_2ea18.dir/CMakeCCompilerABI.c.o] ==> ignore
+    arg [-lgcc] ==> lib [gcc]
+    arg [--as-needed] ==> ignore
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [--no-as-needed] ==> ignore
+    arg [-lc] ==> lib [c]
+    arg [-lgcc] ==> lib [gcc]
+    arg [--as-needed] ==> ignore
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [--no-as-needed] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtend.o] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o] ==> ignore
+  remove lib [gcc]
+  remove lib [gcc_s]
+  remove lib [gcc]
+  remove lib [gcc_s]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5] ==> [/usr/lib/gcc/x86_64-linux-gnu/5]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> [/usr/lib]
+  collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu]
+  collapse library dir [/lib/../lib] ==> [/lib]
+  collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/../lib] ==> [/usr/lib]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> [/usr/lib]
+  implicit libs: [c]
+  implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib]
+  implicit fwks: []
+
+
+
+
+Detecting C [-std=c11] compiler features compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_fe354/fast"
+/usr/bin/make -f CMakeFiles/cmTC_fe354.dir/build.make CMakeFiles/cmTC_fe354.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_fe354.dir/feature_tests.c.o
+/usr/bin/cc    -std=c11 -o CMakeFiles/cmTC_fe354.dir/feature_tests.c.o   -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c
+Linking C executable cmTC_fe354
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_fe354.dir/link.txt --verbose=1
+/usr/bin/cc       CMakeFiles/cmTC_fe354.dir/feature_tests.c.o  -o cmTC_fe354 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+    Feature record: C_FEATURE:1c_function_prototypes
+    Feature record: C_FEATURE:1c_restrict
+    Feature record: C_FEATURE:1c_static_assert
+    Feature record: C_FEATURE:1c_variadic_macros
+
+
+Detecting C [-std=c99] compiler features compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_fefa5/fast"
+/usr/bin/make -f CMakeFiles/cmTC_fefa5.dir/build.make CMakeFiles/cmTC_fefa5.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_fefa5.dir/feature_tests.c.o
+/usr/bin/cc    -std=c99 -o CMakeFiles/cmTC_fefa5.dir/feature_tests.c.o   -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c
+Linking C executable cmTC_fefa5
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_fefa5.dir/link.txt --verbose=1
+/usr/bin/cc       CMakeFiles/cmTC_fefa5.dir/feature_tests.c.o  -o cmTC_fefa5 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+    Feature record: C_FEATURE:1c_function_prototypes
+    Feature record: C_FEATURE:1c_restrict
+    Feature record: C_FEATURE:0c_static_assert
+    Feature record: C_FEATURE:1c_variadic_macros
+
+
+Detecting C [-std=c90] compiler features compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_1c31f/fast"
+/usr/bin/make -f CMakeFiles/cmTC_1c31f.dir/build.make CMakeFiles/cmTC_1c31f.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_1c31f.dir/feature_tests.c.o
+/usr/bin/cc    -std=c90 -o CMakeFiles/cmTC_1c31f.dir/feature_tests.c.o   -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c
+Linking C executable cmTC_1c31f
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_1c31f.dir/link.txt --verbose=1
+/usr/bin/cc       CMakeFiles/cmTC_1c31f.dir/feature_tests.c.o  -o cmTC_1c31f -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+    Feature record: C_FEATURE:1c_function_prototypes
+    Feature record: C_FEATURE:0c_restrict
+    Feature record: C_FEATURE:0c_static_assert
+    Feature record: C_FEATURE:0c_variadic_macros
+Determining if the CXX compiler works passed with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_81321/fast"
+/usr/bin/make -f CMakeFiles/cmTC_81321.dir/build.make CMakeFiles/cmTC_81321.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building CXX object CMakeFiles/cmTC_81321.dir/testCXXCompiler.cxx.o
+/usr/bin/c++      -o CMakeFiles/cmTC_81321.dir/testCXXCompiler.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/testCXXCompiler.cxx
+Linking CXX executable cmTC_81321
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_81321.dir/link.txt --verbose=1
+/usr/bin/c++        CMakeFiles/cmTC_81321.dir/testCXXCompiler.cxx.o  -o cmTC_81321 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+Detecting CXX compiler ABI info compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_c70a3/fast"
+/usr/bin/make -f CMakeFiles/cmTC_c70a3.dir/build.make CMakeFiles/cmTC_c70a3.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building CXX object CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o
+/usr/bin/c++      -o CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.5/Modules/CMakeCXXCompilerABI.cpp
+Linking CXX executable cmTC_c70a3
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c70a3.dir/link.txt --verbose=1
+/usr/bin/c++       -v CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o  -o cmTC_c70a3 -rdynamic  
+Using built-in specs.
+COLLECT_GCC=/usr/bin/c++
+COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
+Thread model: posix
+gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) 
+COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/
+LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c70a3' '-rdynamic' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
+ /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/ccYtMczG.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_c70a3 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+Parsed CXX implicit link information from above output:
+  link line regex: [^( *|.*[/\])(ld|([^/\]+-)?ld|collect2)[^/\]*( |$)]
+  ignore line: [Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp]
+  ignore line: []
+  ignore line: [Run Build Command:"/usr/bin/make" "cmTC_c70a3/fast"]
+  ignore line: [/usr/bin/make -f CMakeFiles/cmTC_c70a3.dir/build.make CMakeFiles/cmTC_c70a3.dir/build]
+  ignore line: [make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp']
+  ignore line: [Building CXX object CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o]
+  ignore line: [/usr/bin/c++      -o CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.5/Modules/CMakeCXXCompilerABI.cpp]
+  ignore line: [Linking CXX executable cmTC_c70a3]
+  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c70a3.dir/link.txt --verbose=1]
+  ignore line: [/usr/bin/c++       -v CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o  -o cmTC_c70a3 -rdynamic  ]
+  ignore line: [Using built-in specs.]
+  ignore line: [COLLECT_GCC=/usr/bin/c++]
+  ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper]
+  ignore line: [Target: x86_64-linux-gnu]
+  ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.10' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu]
+  ignore line: [Thread model: posix]
+  ignore line: [gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) ]
+  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/]
+  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/5/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:./:/usr/lib/gcc/x86_64-linux-gnu/5/../../../:/lib/:/usr/lib/]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c70a3' '-rdynamic' '-shared-libgcc' '-mtune=generic' '-march=x86-64']
+  link line: [ /usr/lib/gcc/x86_64-linux-gnu/5/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper -plugin-opt=-fresolution=/tmp/ccYtMczG.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -export-dynamic -dynamic-linker /lib64/ld-linux-x86-64.so.2 -z relro -o cmTC_c70a3 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/5 -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L. -L/usr/lib/gcc/x86_64-linux-gnu/5/../../.. CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/collect2] ==> ignore
+    arg [-plugin] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/liblto_plugin.so] ==> ignore
+    arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper] ==> ignore
+    arg [-plugin-opt=-fresolution=/tmp/ccYtMczG.res] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [--sysroot=/] ==> ignore
+    arg [--build-id] ==> ignore
+    arg [--eh-frame-hdr] ==> ignore
+    arg [-m] ==> ignore
+    arg [elf_x86_64] ==> ignore
+    arg [--hash-style=gnu] ==> ignore
+    arg [--as-needed] ==> ignore
+    arg [-export-dynamic] ==> ignore
+    arg [-dynamic-linker] ==> ignore
+    arg [/lib64/ld-linux-x86-64.so.2] ==> ignore
+    arg [-zrelro] ==> ignore
+    arg [-o] ==> ignore
+    arg [cmTC_c70a3] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o] ==> ignore
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib]
+    arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu]
+    arg [-L/lib/../lib] ==> dir [/lib/../lib]
+    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
+    arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib]
+    arg [-L.] ==> ignore
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..]
+    arg [CMakeFiles/cmTC_c70a3.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore
+    arg [-lstdc++] ==> lib [stdc++]
+    arg [-lm] ==> lib [m]
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [-lgcc] ==> lib [gcc]
+    arg [-lc] ==> lib [c]
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [-lgcc] ==> lib [gcc]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/crtend.o] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o] ==> ignore
+  remove lib [gcc_s]
+  remove lib [gcc]
+  remove lib [gcc_s]
+  remove lib [gcc]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5] ==> [/usr/lib/gcc/x86_64-linux-gnu/5]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../../../lib] ==> [/usr/lib]
+  collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu]
+  collapse library dir [/lib/../lib] ==> [/lib]
+  collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/../lib] ==> [/usr/lib]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/5/../../..] ==> [/usr/lib]
+  implicit libs: [stdc++;m;c]
+  implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/5;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib]
+  implicit fwks: []
+
+
+
+
+Detecting CXX [-std=c++14] compiler features compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_6a40d/fast"
+/usr/bin/make -f CMakeFiles/cmTC_6a40d.dir/build.make CMakeFiles/cmTC_6a40d.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building CXX object CMakeFiles/cmTC_6a40d.dir/feature_tests.cxx.o
+/usr/bin/c++     -std=c++14 -o CMakeFiles/cmTC_6a40d.dir/feature_tests.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx
+Linking CXX executable cmTC_6a40d
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_6a40d.dir/link.txt --verbose=1
+/usr/bin/c++        CMakeFiles/cmTC_6a40d.dir/feature_tests.cxx.o  -o cmTC_6a40d -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+    Feature record: CXX_FEATURE:1cxx_aggregate_default_initializers
+    Feature record: CXX_FEATURE:1cxx_alias_templates
+    Feature record: CXX_FEATURE:1cxx_alignas
+    Feature record: CXX_FEATURE:1cxx_alignof
+    Feature record: CXX_FEATURE:1cxx_attributes
+    Feature record: CXX_FEATURE:1cxx_attribute_deprecated
+    Feature record: CXX_FEATURE:1cxx_auto_type
+    Feature record: CXX_FEATURE:1cxx_binary_literals
+    Feature record: CXX_FEATURE:1cxx_constexpr
+    Feature record: CXX_FEATURE:1cxx_contextual_conversions
+    Feature record: CXX_FEATURE:1cxx_decltype
+    Feature record: CXX_FEATURE:1cxx_decltype_auto
+    Feature record: CXX_FEATURE:1cxx_decltype_incomplete_return_types
+    Feature record: CXX_FEATURE:1cxx_default_function_template_args
+    Feature record: CXX_FEATURE:1cxx_defaulted_functions
+    Feature record: CXX_FEATURE:1cxx_defaulted_move_initializers
+    Feature record: CXX_FEATURE:1cxx_delegating_constructors
+    Feature record: CXX_FEATURE:1cxx_deleted_functions
+    Feature record: CXX_FEATURE:1cxx_digit_separators
+    Feature record: CXX_FEATURE:1cxx_enum_forward_declarations
+    Feature record: CXX_FEATURE:1cxx_explicit_conversions
+    Feature record: CXX_FEATURE:1cxx_extended_friend_declarations
+    Feature record: CXX_FEATURE:1cxx_extern_templates
+    Feature record: CXX_FEATURE:1cxx_final
+    Feature record: CXX_FEATURE:1cxx_func_identifier
+    Feature record: CXX_FEATURE:1cxx_generalized_initializers
+    Feature record: CXX_FEATURE:1cxx_generic_lambdas
+    Feature record: CXX_FEATURE:1cxx_inheriting_constructors
+    Feature record: CXX_FEATURE:1cxx_inline_namespaces
+    Feature record: CXX_FEATURE:1cxx_lambdas
+    Feature record: CXX_FEATURE:1cxx_lambda_init_captures
+    Feature record: CXX_FEATURE:1cxx_local_type_template_args
+    Feature record: CXX_FEATURE:1cxx_long_long_type
+    Feature record: CXX_FEATURE:1cxx_noexcept
+    Feature record: CXX_FEATURE:1cxx_nonstatic_member_init
+    Feature record: CXX_FEATURE:1cxx_nullptr
+    Feature record: CXX_FEATURE:1cxx_override
+    Feature record: CXX_FEATURE:1cxx_range_for
+    Feature record: CXX_FEATURE:1cxx_raw_string_literals
+    Feature record: CXX_FEATURE:1cxx_reference_qualified_functions
+    Feature record: CXX_FEATURE:1cxx_relaxed_constexpr
+    Feature record: CXX_FEATURE:1cxx_return_type_deduction
+    Feature record: CXX_FEATURE:1cxx_right_angle_brackets
+    Feature record: CXX_FEATURE:1cxx_rvalue_references
+    Feature record: CXX_FEATURE:1cxx_sizeof_member
+    Feature record: CXX_FEATURE:1cxx_static_assert
+    Feature record: CXX_FEATURE:1cxx_strong_enums
+    Feature record: CXX_FEATURE:1cxx_template_template_parameters
+    Feature record: CXX_FEATURE:1cxx_thread_local
+    Feature record: CXX_FEATURE:1cxx_trailing_return_types
+    Feature record: CXX_FEATURE:1cxx_unicode_literals
+    Feature record: CXX_FEATURE:1cxx_uniform_initialization
+    Feature record: CXX_FEATURE:1cxx_unrestricted_unions
+    Feature record: CXX_FEATURE:1cxx_user_literals
+    Feature record: CXX_FEATURE:1cxx_variable_templates
+    Feature record: CXX_FEATURE:1cxx_variadic_macros
+    Feature record: CXX_FEATURE:1cxx_variadic_templates
+
+
+Detecting CXX [-std=c++11] compiler features compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_b98f2/fast"
+/usr/bin/make -f CMakeFiles/cmTC_b98f2.dir/build.make CMakeFiles/cmTC_b98f2.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building CXX object CMakeFiles/cmTC_b98f2.dir/feature_tests.cxx.o
+/usr/bin/c++     -std=c++11 -o CMakeFiles/cmTC_b98f2.dir/feature_tests.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx
+Linking CXX executable cmTC_b98f2
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_b98f2.dir/link.txt --verbose=1
+/usr/bin/c++        CMakeFiles/cmTC_b98f2.dir/feature_tests.cxx.o  -o cmTC_b98f2 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+    Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers
+    Feature record: CXX_FEATURE:1cxx_alias_templates
+    Feature record: CXX_FEATURE:1cxx_alignas
+    Feature record: CXX_FEATURE:1cxx_alignof
+    Feature record: CXX_FEATURE:1cxx_attributes
+    Feature record: CXX_FEATURE:0cxx_attribute_deprecated
+    Feature record: CXX_FEATURE:1cxx_auto_type
+    Feature record: CXX_FEATURE:0cxx_binary_literals
+    Feature record: CXX_FEATURE:1cxx_constexpr
+    Feature record: CXX_FEATURE:0cxx_contextual_conversions
+    Feature record: CXX_FEATURE:1cxx_decltype
+    Feature record: CXX_FEATURE:0cxx_decltype_auto
+    Feature record: CXX_FEATURE:1cxx_decltype_incomplete_return_types
+    Feature record: CXX_FEATURE:1cxx_default_function_template_args
+    Feature record: CXX_FEATURE:1cxx_defaulted_functions
+    Feature record: CXX_FEATURE:1cxx_defaulted_move_initializers
+    Feature record: CXX_FEATURE:1cxx_delegating_constructors
+    Feature record: CXX_FEATURE:1cxx_deleted_functions
+    Feature record: CXX_FEATURE:0cxx_digit_separators
+    Feature record: CXX_FEATURE:1cxx_enum_forward_declarations
+    Feature record: CXX_FEATURE:1cxx_explicit_conversions
+    Feature record: CXX_FEATURE:1cxx_extended_friend_declarations
+    Feature record: CXX_FEATURE:1cxx_extern_templates
+    Feature record: CXX_FEATURE:1cxx_final
+    Feature record: CXX_FEATURE:1cxx_func_identifier
+    Feature record: CXX_FEATURE:1cxx_generalized_initializers
+    Feature record: CXX_FEATURE:0cxx_generic_lambdas
+    Feature record: CXX_FEATURE:1cxx_inheriting_constructors
+    Feature record: CXX_FEATURE:1cxx_inline_namespaces
+    Feature record: CXX_FEATURE:1cxx_lambdas
+    Feature record: CXX_FEATURE:0cxx_lambda_init_captures
+    Feature record: CXX_FEATURE:1cxx_local_type_template_args
+    Feature record: CXX_FEATURE:1cxx_long_long_type
+    Feature record: CXX_FEATURE:1cxx_noexcept
+    Feature record: CXX_FEATURE:1cxx_nonstatic_member_init
+    Feature record: CXX_FEATURE:1cxx_nullptr
+    Feature record: CXX_FEATURE:1cxx_override
+    Feature record: CXX_FEATURE:1cxx_range_for
+    Feature record: CXX_FEATURE:1cxx_raw_string_literals
+    Feature record: CXX_FEATURE:1cxx_reference_qualified_functions
+    Feature record: CXX_FEATURE:0cxx_relaxed_constexpr
+    Feature record: CXX_FEATURE:0cxx_return_type_deduction
+    Feature record: CXX_FEATURE:1cxx_right_angle_brackets
+    Feature record: CXX_FEATURE:1cxx_rvalue_references
+    Feature record: CXX_FEATURE:1cxx_sizeof_member
+    Feature record: CXX_FEATURE:1cxx_static_assert
+    Feature record: CXX_FEATURE:1cxx_strong_enums
+    Feature record: CXX_FEATURE:1cxx_template_template_parameters
+    Feature record: CXX_FEATURE:1cxx_thread_local
+    Feature record: CXX_FEATURE:1cxx_trailing_return_types
+    Feature record: CXX_FEATURE:1cxx_unicode_literals
+    Feature record: CXX_FEATURE:1cxx_uniform_initialization
+    Feature record: CXX_FEATURE:1cxx_unrestricted_unions
+    Feature record: CXX_FEATURE:1cxx_user_literals
+    Feature record: CXX_FEATURE:0cxx_variable_templates
+    Feature record: CXX_FEATURE:1cxx_variadic_macros
+    Feature record: CXX_FEATURE:1cxx_variadic_templates
+
+
+Detecting CXX [-std=c++98] compiler features compiled with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_90018/fast"
+/usr/bin/make -f CMakeFiles/cmTC_90018.dir/build.make CMakeFiles/cmTC_90018.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building CXX object CMakeFiles/cmTC_90018.dir/feature_tests.cxx.o
+/usr/bin/c++     -std=c++98 -o CMakeFiles/cmTC_90018.dir/feature_tests.cxx.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx
+Linking CXX executable cmTC_90018
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_90018.dir/link.txt --verbose=1
+/usr/bin/c++        CMakeFiles/cmTC_90018.dir/feature_tests.cxx.o  -o cmTC_90018 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+    Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers
+    Feature record: CXX_FEATURE:0cxx_alias_templates
+    Feature record: CXX_FEATURE:0cxx_alignas
+    Feature record: CXX_FEATURE:0cxx_alignof
+    Feature record: CXX_FEATURE:0cxx_attributes
+    Feature record: CXX_FEATURE:0cxx_attribute_deprecated
+    Feature record: CXX_FEATURE:0cxx_auto_type
+    Feature record: CXX_FEATURE:0cxx_binary_literals
+    Feature record: CXX_FEATURE:0cxx_constexpr
+    Feature record: CXX_FEATURE:0cxx_contextual_conversions
+    Feature record: CXX_FEATURE:0cxx_decltype
+    Feature record: CXX_FEATURE:0cxx_decltype_auto
+    Feature record: CXX_FEATURE:0cxx_decltype_incomplete_return_types
+    Feature record: CXX_FEATURE:0cxx_default_function_template_args
+    Feature record: CXX_FEATURE:0cxx_defaulted_functions
+    Feature record: CXX_FEATURE:0cxx_defaulted_move_initializers
+    Feature record: CXX_FEATURE:0cxx_delegating_constructors
+    Feature record: CXX_FEATURE:0cxx_deleted_functions
+    Feature record: CXX_FEATURE:0cxx_digit_separators
+    Feature record: CXX_FEATURE:0cxx_enum_forward_declarations
+    Feature record: CXX_FEATURE:0cxx_explicit_conversions
+    Feature record: CXX_FEATURE:0cxx_extended_friend_declarations
+    Feature record: CXX_FEATURE:0cxx_extern_templates
+    Feature record: CXX_FEATURE:0cxx_final
+    Feature record: CXX_FEATURE:0cxx_func_identifier
+    Feature record: CXX_FEATURE:0cxx_generalized_initializers
+    Feature record: CXX_FEATURE:0cxx_generic_lambdas
+    Feature record: CXX_FEATURE:0cxx_inheriting_constructors
+    Feature record: CXX_FEATURE:0cxx_inline_namespaces
+    Feature record: CXX_FEATURE:0cxx_lambdas
+    Feature record: CXX_FEATURE:0cxx_lambda_init_captures
+    Feature record: CXX_FEATURE:0cxx_local_type_template_args
+    Feature record: CXX_FEATURE:0cxx_long_long_type
+    Feature record: CXX_FEATURE:0cxx_noexcept
+    Feature record: CXX_FEATURE:0cxx_nonstatic_member_init
+    Feature record: CXX_FEATURE:0cxx_nullptr
+    Feature record: CXX_FEATURE:0cxx_override
+    Feature record: CXX_FEATURE:0cxx_range_for
+    Feature record: CXX_FEATURE:0cxx_raw_string_literals
+    Feature record: CXX_FEATURE:0cxx_reference_qualified_functions
+    Feature record: CXX_FEATURE:0cxx_relaxed_constexpr
+    Feature record: CXX_FEATURE:0cxx_return_type_deduction
+    Feature record: CXX_FEATURE:0cxx_right_angle_brackets
+    Feature record: CXX_FEATURE:0cxx_rvalue_references
+    Feature record: CXX_FEATURE:0cxx_sizeof_member
+    Feature record: CXX_FEATURE:0cxx_static_assert
+    Feature record: CXX_FEATURE:0cxx_strong_enums
+    Feature record: CXX_FEATURE:1cxx_template_template_parameters
+    Feature record: CXX_FEATURE:0cxx_thread_local
+    Feature record: CXX_FEATURE:0cxx_trailing_return_types
+    Feature record: CXX_FEATURE:0cxx_unicode_literals
+    Feature record: CXX_FEATURE:0cxx_uniform_initialization
+    Feature record: CXX_FEATURE:0cxx_unrestricted_unions
+    Feature record: CXX_FEATURE:0cxx_user_literals
+    Feature record: CXX_FEATURE:0cxx_variable_templates
+    Feature record: CXX_FEATURE:0cxx_variadic_macros
+    Feature record: CXX_FEATURE:0cxx_variadic_templates
+Determining if the include file pthread.h exists passed with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_4d656/fast"
+/usr/bin/make -f CMakeFiles/cmTC_4d656.dir/build.make CMakeFiles/cmTC_4d656.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_4d656.dir/CheckIncludeFile.c.o
+/usr/bin/cc    -fPIC    -o CMakeFiles/cmTC_4d656.dir/CheckIncludeFile.c.o   -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp/CheckIncludeFile.c
+Linking C executable cmTC_4d656
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_4d656.dir/link.txt --verbose=1
+/usr/bin/cc  -fPIC     CMakeFiles/cmTC_4d656.dir/CheckIncludeFile.c.o  -o cmTC_4d656 -rdynamic 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
+Determining if the function pthread_create exists in the pthread passed with the following output:
+Change Dir: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp
+
+Run Build Command:"/usr/bin/make" "cmTC_aece3/fast"
+/usr/bin/make -f CMakeFiles/cmTC_aece3.dir/build.make CMakeFiles/cmTC_aece3.dir/build
+make[1]: Entering directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_aece3.dir/CheckFunctionExists.c.o
+/usr/bin/cc    -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create   -o CMakeFiles/cmTC_aece3.dir/CheckFunctionExists.c.o   -c /usr/share/cmake-3.5/Modules/CheckFunctionExists.c
+Linking C executable cmTC_aece3
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_aece3.dir/link.txt --verbose=1
+/usr/bin/cc  -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create    CMakeFiles/cmTC_aece3.dir/CheckFunctionExists.c.o  -o cmTC_aece3 -rdynamic -lpthread 
+make[1]: Leaving directory '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeTmp'
+
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeRuleHashes.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeRuleHashes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..988c14c4be383f9b7bc75c3d6e196b04b99196b8
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/CMakeRuleHashes.txt
@@ -0,0 +1,2 @@
+# Hashes of file build rules.
+9de7d50a6da57ec557a0d6ed4f990e8c CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..f8cb2571537c410d2267730f9e2dff7d8fb9890f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile.cmake
@@ -0,0 +1,68 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# The generator used is:
+set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
+
+# The top level Makefile was generated from the following files:
+set(CMAKE_MAKEFILE_DEPENDS
+  "CMakeCache.txt"
+  "../CMakeLists.txt"
+  "CMakeFiles/3.5.1/CMakeCCompiler.cmake"
+  "CMakeFiles/3.5.1/CMakeCXXCompiler.cmake"
+  "CMakeFiles/3.5.1/CMakeSystem.cmake"
+  "CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend"
+  "/usr/share/cmake-3.5/Modules/CMakeCInformation.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeCXXInformation.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeCommonLanguageInclude.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeGenericSystem.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeLanguageInformation.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeParseArguments.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeSystemSpecificInformation.cmake"
+  "/usr/share/cmake-3.5/Modules/CMakeSystemSpecificInitialize.cmake"
+  "/usr/share/cmake-3.5/Modules/CheckIncludeFile.cmake"
+  "/usr/share/cmake-3.5/Modules/CheckLibraryExists.cmake"
+  "/usr/share/cmake-3.5/Modules/CheckSymbolExists.cmake"
+  "/usr/share/cmake-3.5/Modules/Compiler/GNU-C.cmake"
+  "/usr/share/cmake-3.5/Modules/Compiler/GNU-CXX.cmake"
+  "/usr/share/cmake-3.5/Modules/Compiler/GNU.cmake"
+  "/usr/share/cmake-3.5/Modules/FindCUDA.cmake"
+  "/usr/share/cmake-3.5/Modules/FindCUDA/run_nvcc.cmake"
+  "/usr/share/cmake-3.5/Modules/FindPackageHandleStandardArgs.cmake"
+  "/usr/share/cmake-3.5/Modules/FindPackageMessage.cmake"
+  "/usr/share/cmake-3.5/Modules/FindThreads.cmake"
+  "/usr/share/cmake-3.5/Modules/Platform/Linux-GNU-C.cmake"
+  "/usr/share/cmake-3.5/Modules/Platform/Linux-GNU-CXX.cmake"
+  "/usr/share/cmake-3.5/Modules/Platform/Linux-GNU.cmake"
+  "/usr/share/cmake-3.5/Modules/Platform/Linux.cmake"
+  "/usr/share/cmake-3.5/Modules/Platform/UnixPaths.cmake"
+  )
+
+# The corresponding makefile is:
+set(CMAKE_MAKEFILE_OUTPUTS
+  "Makefile"
+  "CMakeFiles/cmake.check_cache"
+  )
+
+# Byproducts of CMake generate step:
+set(CMAKE_MAKEFILE_PRODUCTS
+  "CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake"
+  "CMakeFiles/CMakeDirectoryInformation.cmake"
+  )
+
+# Dependency information for all targets:
+set(CMAKE_DEPEND_INFO_FILES
+  "CMakeFiles/lenet_keras_half.dir/DependInfo.cmake"
+  "CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake"
+  "CMakeFiles/fc4_half.dir/DependInfo.cmake"
+  "CMakeFiles/fc3_clipped.dir/DependInfo.cmake"
+  "CMakeFiles/fc2_clipped.dir/DependInfo.cmake"
+  "CMakeFiles/test_ops.dir/DependInfo.cmake"
+  "CMakeFiles/fc3_half.dir/DependInfo.cmake"
+  "CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  "CMakeFiles/fc4_clipped.dir/DependInfo.cmake"
+  "CMakeFiles/cifar_keras.dir/DependInfo.cmake"
+  "CMakeFiles/lenet_keras.dir/DependInfo.cmake"
+  "CMakeFiles/lenet_tanh.dir/DependInfo.cmake"
+  "CMakeFiles/fc2_half.dir/DependInfo.cmake"
+  )
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile2 b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile2
new file mode 100644
index 0000000000000000000000000000000000000000..0b344bc3c2932e509eae114809b8b641c1aeb1c5
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/Makefile2
@@ -0,0 +1,552 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Default target executed when no arguments are given to make.
+default_target: all
+
+.PHONY : default_target
+
+# The main recursive all target
+all:
+
+.PHONY : all
+
+# The main recursive preinstall target
+preinstall:
+
+.PHONY : preinstall
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+#=============================================================================
+# Target rules for target CMakeFiles/lenet_keras_half.dir
+
+# All Build rule for target.
+CMakeFiles/lenet_keras_half.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/depend
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=17,18 "Built target lenet_keras_half"
+.PHONY : CMakeFiles/lenet_keras_half.dir/all
+
+# Include target in all.
+all: CMakeFiles/lenet_keras_half.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/lenet_keras_half.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_keras_half.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/lenet_keras_half.dir/rule
+
+# Convenience name for target.
+lenet_keras_half: CMakeFiles/lenet_keras_half.dir/rule
+
+.PHONY : lenet_keras_half
+
+# clean rule for target.
+CMakeFiles/lenet_keras_half.dir/clean:
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/clean
+.PHONY : CMakeFiles/lenet_keras_half.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/lenet_keras_half.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/lenet_tanh_half.dir
+
+# All Build rule for target.
+CMakeFiles/lenet_tanh_half.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/depend
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=21,22 "Built target lenet_tanh_half"
+.PHONY : CMakeFiles/lenet_tanh_half.dir/all
+
+# Include target in all.
+all: CMakeFiles/lenet_tanh_half.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/lenet_tanh_half.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_tanh_half.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/lenet_tanh_half.dir/rule
+
+# Convenience name for target.
+lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/rule
+
+.PHONY : lenet_tanh_half
+
+# clean rule for target.
+CMakeFiles/lenet_tanh_half.dir/clean:
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/clean
+.PHONY : CMakeFiles/lenet_tanh_half.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/lenet_tanh_half.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/fc4_half.dir
+
+# All Build rule for target.
+CMakeFiles/fc4_half.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/depend
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=13,14 "Built target fc4_half"
+.PHONY : CMakeFiles/fc4_half.dir/all
+
+# Include target in all.
+all: CMakeFiles/fc4_half.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/fc4_half.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc4_half.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/fc4_half.dir/rule
+
+# Convenience name for target.
+fc4_half: CMakeFiles/fc4_half.dir/rule
+
+.PHONY : fc4_half
+
+# clean rule for target.
+CMakeFiles/fc4_half.dir/clean:
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/clean
+.PHONY : CMakeFiles/fc4_half.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/fc4_half.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/fc3_clipped.dir
+
+# All Build rule for target.
+CMakeFiles/fc3_clipped.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/depend
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=7,8 "Built target fc3_clipped"
+.PHONY : CMakeFiles/fc3_clipped.dir/all
+
+# Include target in all.
+all: CMakeFiles/fc3_clipped.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/fc3_clipped.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc3_clipped.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/fc3_clipped.dir/rule
+
+# Convenience name for target.
+fc3_clipped: CMakeFiles/fc3_clipped.dir/rule
+
+.PHONY : fc3_clipped
+
+# clean rule for target.
+CMakeFiles/fc3_clipped.dir/clean:
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/clean
+.PHONY : CMakeFiles/fc3_clipped.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/fc3_clipped.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/fc2_clipped.dir
+
+# All Build rule for target.
+CMakeFiles/fc2_clipped.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/depend
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=3,4 "Built target fc2_clipped"
+.PHONY : CMakeFiles/fc2_clipped.dir/all
+
+# Include target in all.
+all: CMakeFiles/fc2_clipped.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/fc2_clipped.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc2_clipped.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/fc2_clipped.dir/rule
+
+# Convenience name for target.
+fc2_clipped: CMakeFiles/fc2_clipped.dir/rule
+
+.PHONY : fc2_clipped
+
+# clean rule for target.
+CMakeFiles/fc2_clipped.dir/clean:
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/clean
+.PHONY : CMakeFiles/fc2_clipped.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/fc2_clipped.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/test_ops.dir
+
+# All Build rule for target.
+CMakeFiles/test_ops.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/depend
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=25,26 "Built target test_ops"
+.PHONY : CMakeFiles/test_ops.dir/all
+
+# Include target in all.
+all: CMakeFiles/test_ops.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/test_ops.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/test_ops.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/test_ops.dir/rule
+
+# Convenience name for target.
+test_ops: CMakeFiles/test_ops.dir/rule
+
+.PHONY : test_ops
+
+# clean rule for target.
+CMakeFiles/test_ops.dir/clean:
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/clean
+.PHONY : CMakeFiles/test_ops.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/test_ops.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/fc3_half.dir
+
+# All Build rule for target.
+CMakeFiles/fc3_half.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/depend
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=9,10 "Built target fc3_half"
+.PHONY : CMakeFiles/fc3_half.dir/all
+
+# Include target in all.
+all: CMakeFiles/fc3_half.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/fc3_half.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc3_half.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/fc3_half.dir/rule
+
+# Convenience name for target.
+fc3_half: CMakeFiles/fc3_half.dir/rule
+
+.PHONY : fc3_half
+
+# clean rule for target.
+CMakeFiles/fc3_half.dir/clean:
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/clean
+.PHONY : CMakeFiles/fc3_half.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/fc3_half.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/tensor_runtime.dir
+
+# All Build rule for target.
+CMakeFiles/tensor_runtime.dir/all:
+	$(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/depend
+	$(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=23,24 "Built target tensor_runtime"
+.PHONY : CMakeFiles/tensor_runtime.dir/all
+
+# Include target in all.
+all: CMakeFiles/tensor_runtime.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/tensor_runtime.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 2
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/tensor_runtime.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/tensor_runtime.dir/rule
+
+# Convenience name for target.
+tensor_runtime: CMakeFiles/tensor_runtime.dir/rule
+
+.PHONY : tensor_runtime
+
+# clean rule for target.
+CMakeFiles/tensor_runtime.dir/clean:
+	$(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/clean
+.PHONY : CMakeFiles/tensor_runtime.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/tensor_runtime.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/fc4_clipped.dir
+
+# All Build rule for target.
+CMakeFiles/fc4_clipped.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/depend
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=11,12 "Built target fc4_clipped"
+.PHONY : CMakeFiles/fc4_clipped.dir/all
+
+# Include target in all.
+all: CMakeFiles/fc4_clipped.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/fc4_clipped.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc4_clipped.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/fc4_clipped.dir/rule
+
+# Convenience name for target.
+fc4_clipped: CMakeFiles/fc4_clipped.dir/rule
+
+.PHONY : fc4_clipped
+
+# clean rule for target.
+CMakeFiles/fc4_clipped.dir/clean:
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/clean
+.PHONY : CMakeFiles/fc4_clipped.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/fc4_clipped.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/cifar_keras.dir
+
+# All Build rule for target.
+CMakeFiles/cifar_keras.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/depend
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=1,2 "Built target cifar_keras"
+.PHONY : CMakeFiles/cifar_keras.dir/all
+
+# Include target in all.
+all: CMakeFiles/cifar_keras.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/cifar_keras.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/cifar_keras.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/cifar_keras.dir/rule
+
+# Convenience name for target.
+cifar_keras: CMakeFiles/cifar_keras.dir/rule
+
+.PHONY : cifar_keras
+
+# clean rule for target.
+CMakeFiles/cifar_keras.dir/clean:
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/clean
+.PHONY : CMakeFiles/cifar_keras.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/cifar_keras.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/lenet_keras.dir
+
+# All Build rule for target.
+CMakeFiles/lenet_keras.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/depend
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=15,16 "Built target lenet_keras"
+.PHONY : CMakeFiles/lenet_keras.dir/all
+
+# Include target in all.
+all: CMakeFiles/lenet_keras.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/lenet_keras.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_keras.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/lenet_keras.dir/rule
+
+# Convenience name for target.
+lenet_keras: CMakeFiles/lenet_keras.dir/rule
+
+.PHONY : lenet_keras
+
+# clean rule for target.
+CMakeFiles/lenet_keras.dir/clean:
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/clean
+.PHONY : CMakeFiles/lenet_keras.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/lenet_keras.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/lenet_tanh.dir
+
+# All Build rule for target.
+CMakeFiles/lenet_tanh.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/depend
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=19,20 "Built target lenet_tanh"
+.PHONY : CMakeFiles/lenet_tanh.dir/all
+
+# Include target in all.
+all: CMakeFiles/lenet_tanh.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/lenet_tanh.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/lenet_tanh.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/lenet_tanh.dir/rule
+
+# Convenience name for target.
+lenet_tanh: CMakeFiles/lenet_tanh.dir/rule
+
+.PHONY : lenet_tanh
+
+# clean rule for target.
+CMakeFiles/lenet_tanh.dir/clean:
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/clean
+.PHONY : CMakeFiles/lenet_tanh.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/lenet_tanh.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/fc2_half.dir
+
+# All Build rule for target.
+CMakeFiles/fc2_half.dir/all: CMakeFiles/tensor_runtime.dir/all
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/depend
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=5,6 "Built target fc2_half"
+.PHONY : CMakeFiles/fc2_half.dir/all
+
+# Include target in all.
+all: CMakeFiles/fc2_half.dir/all
+
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/fc2_half.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 4
+	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/fc2_half.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : CMakeFiles/fc2_half.dir/rule
+
+# Convenience name for target.
+fc2_half: CMakeFiles/fc2_half.dir/rule
+
+.PHONY : fc2_half
+
+# clean rule for target.
+CMakeFiles/fc2_half.dir/clean:
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/clean
+.PHONY : CMakeFiles/fc2_half.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/fc2_half.dir/clean
+
+.PHONY : clean
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/TargetDirectories.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/TargetDirectories.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7aa182268196538906366b5fc5667cd3cb92bc7f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/TargetDirectories.txt
@@ -0,0 +1,15 @@
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/edit_cache.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/rebuild_cache.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..78a78817e8c337630492f9d074f99130d5e5b442
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/CXX.includecache
@@ -0,0 +1,610 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..4b42d430b83dd89ba4b4356776e1dca9a75d3d6e
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..2e4378ebfbc17bb851037c2b1eb2ca03b3955b57
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/cifar_keras.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/cifar_keras.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/cifar_keras.dir/flags.make
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: CMakeFiles/cifar_keras.dir/flags.make
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/src/cifar_keras.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc > CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc -o CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires:
+
+.PHONY : CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides.build
+.PHONY : CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.provides.build: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o
+
+
+# Object files for target cifar_keras
+cifar_keras_OBJECTS = \
+"CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o"
+
+# External object files for target cifar_keras
+cifar_keras_EXTERNAL_OBJECTS =
+
+cifar_keras: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o
+cifar_keras: CMakeFiles/cifar_keras.dir/build.make
+cifar_keras: libtensor_runtime.a
+cifar_keras: /software/cuda-9.1/lib64/libcudart_static.a
+cifar_keras: /usr/lib/x86_64-linux-gnu/librt.so
+cifar_keras: /software/cuda-9.1/lib64/libcublas.so
+cifar_keras: CMakeFiles/cifar_keras.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable cifar_keras"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/cifar_keras.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/cifar_keras.dir/build: cifar_keras
+
+.PHONY : CMakeFiles/cifar_keras.dir/build
+
+CMakeFiles/cifar_keras.dir/requires: CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o.requires
+
+.PHONY : CMakeFiles/cifar_keras.dir/requires
+
+CMakeFiles/cifar_keras.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/cifar_keras.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/cifar_keras.dir/clean
+
+CMakeFiles/cifar_keras.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/cifar_keras.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..335c9fb2bc422090fd58f558df55be4ff0e2292d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o"
+  "cifar_keras.pdb"
+  "cifar_keras"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/cifar_keras.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..a83f9ca39892005e9714e9baa2535debb2237046
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..c69b29f90158618bf7eb4d5afc2d16b3e4265644
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../dnn_sources/src/cifar_keras.cc
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95dc6ca79382618bd80eeb65c600f9daa166ec63
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o  -o cifar_keras  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..abadeb0c3abaa81d622026fcd3ae096d03dd29b7
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cifar_keras.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 1
+CMAKE_PROGRESS_2 = 2
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cmake.check_cache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cmake.check_cache
new file mode 100644
index 0000000000000000000000000000000000000000..3dccd731726d7faa8b29d8d7dba3b981a53ca497
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/cmake.check_cache
@@ -0,0 +1 @@
+# This file is generated by cmake for dependency checking of the CMakeCache.txt file
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..8acab7ba46b391f5dc7af10b96f417aebfd080f4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..290629a09dcc44628fd8bfbb815ea84749126e12
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..5e0004f1cb85f419ab45ee449afb52e309e621cb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/fc2_clipped.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/fc2_clipped.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/fc2_clipped.dir/flags.make
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: CMakeFiles/fc2_clipped.dir/flags.make
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/src/fc2_clipped.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc > CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc -o CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires:
+
+.PHONY : CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides.build
+.PHONY : CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.provides.build: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o
+
+
+# Object files for target fc2_clipped
+fc2_clipped_OBJECTS = \
+"CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o"
+
+# External object files for target fc2_clipped
+fc2_clipped_EXTERNAL_OBJECTS =
+
+fc2_clipped: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o
+fc2_clipped: CMakeFiles/fc2_clipped.dir/build.make
+fc2_clipped: libtensor_runtime.a
+fc2_clipped: /software/cuda-9.1/lib64/libcudart_static.a
+fc2_clipped: /usr/lib/x86_64-linux-gnu/librt.so
+fc2_clipped: /software/cuda-9.1/lib64/libcublas.so
+fc2_clipped: CMakeFiles/fc2_clipped.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc2_clipped"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc2_clipped.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/fc2_clipped.dir/build: fc2_clipped
+
+.PHONY : CMakeFiles/fc2_clipped.dir/build
+
+CMakeFiles/fc2_clipped.dir/requires: CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o.requires
+
+.PHONY : CMakeFiles/fc2_clipped.dir/requires
+
+CMakeFiles/fc2_clipped.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/fc2_clipped.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/fc2_clipped.dir/clean
+
+CMakeFiles/fc2_clipped.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/fc2_clipped.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..0140a9a42193824cd671307074eed19164b868ea
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o"
+  "fc2_clipped.pdb"
+  "fc2_clipped"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/fc2_clipped.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..ccedd50d251316aeca0ad00cd47dc5c7c98ae5cc
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..3fd2cbb0b5d8936cc61934011ea513010a21ceea
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../dnn_sources/src/fc2_clipped.cc
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca553fcc5572e41ec679ffa141b786f8b066f61
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o  -o fc2_clipped  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..8c8fb6fbbc138d8387b9ed9bdb2088ee8aa036f6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_clipped.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 3
+CMAKE_PROGRESS_2 = 4
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..2ad6351efadfcce11eb2338cf4e5e6dd68c3e9c4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..fc3896454326f0206ad33fa86b2df38571a21ba6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..53ce420327310bca8328607c4c687f45f408840a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/fc2_half.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/fc2_half.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/fc2_half.dir/flags.make
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: CMakeFiles/fc2_half.dir/flags.make
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/src/half/fc2_half.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc > CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc -o CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires:
+
+.PHONY : CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides.build
+.PHONY : CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.provides.build: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o
+
+
+# Object files for target fc2_half
+fc2_half_OBJECTS = \
+"CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o"
+
+# External object files for target fc2_half
+fc2_half_EXTERNAL_OBJECTS =
+
+fc2_half: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o
+fc2_half: CMakeFiles/fc2_half.dir/build.make
+fc2_half: libtensor_runtime.a
+fc2_half: /software/cuda-9.1/lib64/libcudart_static.a
+fc2_half: /usr/lib/x86_64-linux-gnu/librt.so
+fc2_half: /software/cuda-9.1/lib64/libcublas.so
+fc2_half: CMakeFiles/fc2_half.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc2_half"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc2_half.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/fc2_half.dir/build: fc2_half
+
+.PHONY : CMakeFiles/fc2_half.dir/build
+
+CMakeFiles/fc2_half.dir/requires: CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o.requires
+
+.PHONY : CMakeFiles/fc2_half.dir/requires
+
+CMakeFiles/fc2_half.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/fc2_half.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/fc2_half.dir/clean
+
+CMakeFiles/fc2_half.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/fc2_half.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..e15f368c9c73447a9ff859216be3579b8c6a9f98
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o"
+  "fc2_half.pdb"
+  "fc2_half"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/fc2_half.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..63d11755b775dbef7f1245dd9a92ad5472531cbd
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..622ac63922f473f23429e608be0a62e3681c8abc
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../dnn_sources/src/half/fc2_half.cc
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..533b66238e91aa134146a53bc1452498c9a383e1
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o  -o fc2_half  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..3a86673aa7c1868ad77aa16c631effd83be0da02
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc2_half.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 5
+CMAKE_PROGRESS_2 = 6
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..2ee46f5d1f428a09733312af4fa825a1d5cd40bc
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..e1e4dd53ee209b075c9e378f3b9bc5f66f7b84a4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..372d86fc4b40de8fb2c6940a06dbfa77de6e0cb3
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/fc3_clipped.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/fc3_clipped.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/fc3_clipped.dir/flags.make
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: CMakeFiles/fc3_clipped.dir/flags.make
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/src/fc3_clipped.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc > CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc -o CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires:
+
+.PHONY : CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides.build
+.PHONY : CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.provides.build: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o
+
+
+# Object files for target fc3_clipped
+fc3_clipped_OBJECTS = \
+"CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o"
+
+# External object files for target fc3_clipped
+fc3_clipped_EXTERNAL_OBJECTS =
+
+fc3_clipped: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o
+fc3_clipped: CMakeFiles/fc3_clipped.dir/build.make
+fc3_clipped: libtensor_runtime.a
+fc3_clipped: /software/cuda-9.1/lib64/libcudart_static.a
+fc3_clipped: /usr/lib/x86_64-linux-gnu/librt.so
+fc3_clipped: /software/cuda-9.1/lib64/libcublas.so
+fc3_clipped: CMakeFiles/fc3_clipped.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc3_clipped"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc3_clipped.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/fc3_clipped.dir/build: fc3_clipped
+
+.PHONY : CMakeFiles/fc3_clipped.dir/build
+
+CMakeFiles/fc3_clipped.dir/requires: CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o.requires
+
+.PHONY : CMakeFiles/fc3_clipped.dir/requires
+
+CMakeFiles/fc3_clipped.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/fc3_clipped.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/fc3_clipped.dir/clean
+
+CMakeFiles/fc3_clipped.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/fc3_clipped.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..c10c79fc608515b6cf396999d266ac0b3c40df26
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o"
+  "fc3_clipped.pdb"
+  "fc3_clipped"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/fc3_clipped.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..a132ed403d0cfacf109e55775a1e709ac8828c4e
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..33de24568667c0f88d3d89d9b695fab3d9dea392
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../dnn_sources/src/fc3_clipped.cc
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34dd27fdc47e7fa2268711dcf46ae7a35cd85036
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o  -o fc3_clipped  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..72bb7dd025afc5824222cbd3a1e64841afc2792c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_clipped.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 7
+CMAKE_PROGRESS_2 = 8
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..7a9a368338463bf553ce05065113b374a2f46d48
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ef40691589e0ec87c0c22644f5067e71aaab39a0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..0d90cb857ca1fb3396452948fe968740703c0ec2
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/fc3_half.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/fc3_half.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/fc3_half.dir/flags.make
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: CMakeFiles/fc3_half.dir/flags.make
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/src/half/fc3_half.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc > CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc -o CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires:
+
+.PHONY : CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides.build
+.PHONY : CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.provides.build: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o
+
+
+# Object files for target fc3_half
+fc3_half_OBJECTS = \
+"CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o"
+
+# External object files for target fc3_half
+fc3_half_EXTERNAL_OBJECTS =
+
+fc3_half: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o
+fc3_half: CMakeFiles/fc3_half.dir/build.make
+fc3_half: libtensor_runtime.a
+fc3_half: /software/cuda-9.1/lib64/libcudart_static.a
+fc3_half: /usr/lib/x86_64-linux-gnu/librt.so
+fc3_half: /software/cuda-9.1/lib64/libcublas.so
+fc3_half: CMakeFiles/fc3_half.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc3_half"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc3_half.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/fc3_half.dir/build: fc3_half
+
+.PHONY : CMakeFiles/fc3_half.dir/build
+
+CMakeFiles/fc3_half.dir/requires: CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o.requires
+
+.PHONY : CMakeFiles/fc3_half.dir/requires
+
+CMakeFiles/fc3_half.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/fc3_half.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/fc3_half.dir/clean
+
+CMakeFiles/fc3_half.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/fc3_half.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..75f16f0d1fa72bf2fbc24aad3d50a670cb021c75
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o"
+  "fc3_half.pdb"
+  "fc3_half"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/fc3_half.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..a5d2a293e7c97aceec9ba6bc45bc3ee86134af41
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..ca2c2eb82316f95d1114b69d950002a4bdc02ef6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../dnn_sources/src/half/fc3_half.cc
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d71d52f75e878f64d1c257720b3266e17d1f1334
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o  -o fc3_half  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..b700c2c902219d74619014853aade0d7ec177030
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc3_half.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 9
+CMAKE_PROGRESS_2 = 10
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..7992fbb40f70cb903029f05b75e0bf882ac7fa3d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..6c82d46875329badd24c671d5698a3c366073b44
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..cede9a32472e36ebb8373dee6826c507f3e8821a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/fc4_clipped.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/fc4_clipped.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/fc4_clipped.dir/flags.make
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: CMakeFiles/fc4_clipped.dir/flags.make
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/src/fc4_clipped.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc > CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc -o CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires:
+
+.PHONY : CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides.build
+.PHONY : CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.provides.build: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o
+
+
+# Object files for target fc4_clipped
+fc4_clipped_OBJECTS = \
+"CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o"
+
+# External object files for target fc4_clipped
+fc4_clipped_EXTERNAL_OBJECTS =
+
+fc4_clipped: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o
+fc4_clipped: CMakeFiles/fc4_clipped.dir/build.make
+fc4_clipped: libtensor_runtime.a
+fc4_clipped: /software/cuda-9.1/lib64/libcudart_static.a
+fc4_clipped: /usr/lib/x86_64-linux-gnu/librt.so
+fc4_clipped: /software/cuda-9.1/lib64/libcublas.so
+fc4_clipped: CMakeFiles/fc4_clipped.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc4_clipped"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc4_clipped.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/fc4_clipped.dir/build: fc4_clipped
+
+.PHONY : CMakeFiles/fc4_clipped.dir/build
+
+CMakeFiles/fc4_clipped.dir/requires: CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o.requires
+
+.PHONY : CMakeFiles/fc4_clipped.dir/requires
+
+CMakeFiles/fc4_clipped.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/fc4_clipped.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/fc4_clipped.dir/clean
+
+CMakeFiles/fc4_clipped.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/fc4_clipped.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..8666a8e8cb28664c8bc811043d55de5f02507a4d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o"
+  "fc4_clipped.pdb"
+  "fc4_clipped"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/fc4_clipped.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..2c11361bc9c37ce3054af9fabe256bbcfadf998a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..6ca5e15a2cec9d3b3f5d77a99255f4ced3910340
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../dnn_sources/src/fc4_clipped.cc
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a705566a567718c9ed1ba2ccb7368f5c980663f7
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o  -o fc4_clipped  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..596289c0fd56aa23623cdcbea086a133035b3bc8
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_clipped.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 11
+CMAKE_PROGRESS_2 = 12
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..433a511749074e96972c1e6985f9b0dc8b9365e1
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..edbcd0fead66f2c20df42fbc7507ab4e3fa495e7
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..63a3f0b76f125f34757ee9e9737dea769b0be782
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/fc4_half.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/fc4_half.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/fc4_half.dir/flags.make
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: CMakeFiles/fc4_half.dir/flags.make
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/src/half/fc4_half.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc > CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc -o CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires:
+
+.PHONY : CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides.build
+.PHONY : CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.provides.build: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o
+
+
+# Object files for target fc4_half
+fc4_half_OBJECTS = \
+"CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o"
+
+# External object files for target fc4_half
+fc4_half_EXTERNAL_OBJECTS =
+
+fc4_half: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o
+fc4_half: CMakeFiles/fc4_half.dir/build.make
+fc4_half: libtensor_runtime.a
+fc4_half: /software/cuda-9.1/lib64/libcudart_static.a
+fc4_half: /usr/lib/x86_64-linux-gnu/librt.so
+fc4_half: /software/cuda-9.1/lib64/libcublas.so
+fc4_half: CMakeFiles/fc4_half.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable fc4_half"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/fc4_half.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/fc4_half.dir/build: fc4_half
+
+.PHONY : CMakeFiles/fc4_half.dir/build
+
+CMakeFiles/fc4_half.dir/requires: CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o.requires
+
+.PHONY : CMakeFiles/fc4_half.dir/requires
+
+CMakeFiles/fc4_half.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/fc4_half.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/fc4_half.dir/clean
+
+CMakeFiles/fc4_half.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/fc4_half.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..3d5fa79831540e884599717684998178268aeca2
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o"
+  "fc4_half.pdb"
+  "fc4_half"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/fc4_half.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..150e2b93982ed4781675774d28c09970064e6b1a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..3584a546c0aeab001413df6299b417b58c3423cd
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../dnn_sources/src/half/fc4_half.cc
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8738e81d5ca4717506e51748de1a06e1bdf1c033
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o  -o fc4_half  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..d92f75a2935ea31751e4e3d62297a6a1c131fb4d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/fc4_half.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 13
+CMAKE_PROGRESS_2 = 14
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.bin b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.bin
new file mode 100755
index 0000000000000000000000000000000000000000..6c440e3f3fecefb9cee9c3151cbf31e26cbe7575
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c
new file mode 100644
index 0000000000000000000000000000000000000000..6590dded2342f3eebd9b81505327e84a488580e6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.c
@@ -0,0 +1,34 @@
+
+  const char features[] = {"\n"
+"C_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404
+"1"
+#else
+"0"
+#endif
+"c_function_prototypes\n"
+"C_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+"1"
+#else
+"0"
+#endif
+"c_restrict\n"
+"C_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201000L
+"1"
+#else
+"0"
+#endif
+"c_static_assert\n"
+"C_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+"1"
+#else
+"0"
+#endif
+"c_variadic_macros\n"
+
+};
+
+int main(int argc, char** argv) { (void)argv; return features[argc]; }
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..b93418c6ed69feaf1b5c2feb9592bbdb5a5f042c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/feature_tests.cxx
@@ -0,0 +1,405 @@
+
+  const char features[] = {"\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L
+"1"
+#else
+"0"
+#endif
+"cxx_aggregate_default_initializers\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_alias_templates\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_alignas\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_alignof\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_attributes\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_attribute_deprecated\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_auto_type\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_binary_literals\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_constexpr\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_contextual_conversions\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_decltype\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_decltype_auto\n"
+"CXX_FEATURE:"
+#if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_decltype_incomplete_return_types\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_default_function_template_args\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_defaulted_functions\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_defaulted_move_initializers\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_delegating_constructors\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_deleted_functions\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_digit_separators\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_enum_forward_declarations\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_explicit_conversions\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_extended_friend_declarations\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_extern_templates\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_final\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_func_identifier\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_generalized_initializers\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_generic_lambdas\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_inheriting_constructors\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_inline_namespaces\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_lambdas\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_lambda_init_captures\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_local_type_template_args\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_long_long_type\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_noexcept\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_nonstatic_member_init\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_nullptr\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_override\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_range_for\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_raw_string_literals\n"
+"CXX_FEATURE:"
+#if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_reference_qualified_functions\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L
+"1"
+#else
+"0"
+#endif
+"cxx_relaxed_constexpr\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_return_type_deduction\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_right_angle_brackets\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_rvalue_references\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_sizeof_member\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_static_assert\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_strong_enums\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && __cplusplus
+"1"
+#else
+"0"
+#endif
+"cxx_template_template_parameters\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_thread_local\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_trailing_return_types\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_unicode_literals\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_uniform_initialization\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_unrestricted_unions\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
+"1"
+#else
+"0"
+#endif
+"cxx_user_literals\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L
+"1"
+#else
+"0"
+#endif
+"cxx_variable_templates\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_variadic_macros\n"
+"CXX_FEATURE:"
+#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
+"1"
+#else
+"0"
+#endif
+"cxx_variadic_templates\n"
+
+};
+
+int main(int argc, char** argv) { (void)argv; return features[argc]; }
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..7b1682a10ad5f3207e7f8e392d79e53e7f78d7d0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/CXX.includecache
@@ -0,0 +1,610 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..6c61fef38fa9a233cd858f66f49f624b17f9519c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..0cec553b8789fe646c1ec825842e45410f44ea7c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/lenet_keras.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/lenet_keras.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/lenet_keras.dir/flags.make
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: CMakeFiles/lenet_keras.dir/flags.make
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/src/lenet_keras.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc > CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc -o CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires:
+
+.PHONY : CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides.build
+.PHONY : CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.provides.build: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o
+
+
+# Object files for target lenet_keras
+lenet_keras_OBJECTS = \
+"CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o"
+
+# External object files for target lenet_keras
+lenet_keras_EXTERNAL_OBJECTS =
+
+lenet_keras: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o
+lenet_keras: CMakeFiles/lenet_keras.dir/build.make
+lenet_keras: libtensor_runtime.a
+lenet_keras: /software/cuda-9.1/lib64/libcudart_static.a
+lenet_keras: /usr/lib/x86_64-linux-gnu/librt.so
+lenet_keras: /software/cuda-9.1/lib64/libcublas.so
+lenet_keras: CMakeFiles/lenet_keras.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_keras"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_keras.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/lenet_keras.dir/build: lenet_keras
+
+.PHONY : CMakeFiles/lenet_keras.dir/build
+
+CMakeFiles/lenet_keras.dir/requires: CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o.requires
+
+.PHONY : CMakeFiles/lenet_keras.dir/requires
+
+CMakeFiles/lenet_keras.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/lenet_keras.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/lenet_keras.dir/clean
+
+CMakeFiles/lenet_keras.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/lenet_keras.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..dbf02b7144e460c621ec6fe1215b3cbe2d49d427
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o"
+  "lenet_keras.pdb"
+  "lenet_keras"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/lenet_keras.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..ea2a7799d8be85ef1fdde83046f5b3db84550822
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..933879b479118d71c7b3b2e3bd2dc1d1d4594f5b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../dnn_sources/src/lenet_keras.cc
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e1e839dd5a8a40521da465cb989b3033cc4678
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o  -o lenet_keras  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..a35c33b98d59108c8111c49cbe919dcc1205b601
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 15
+CMAKE_PROGRESS_2 = 16
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..c9299a69419463ebab3ce01de5794334a0e58e06
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/CXX.includecache
@@ -0,0 +1,610 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..2130e8a8dc6efdf2d4a62716de3e5a34a8ac999a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..47c160d10423c81d89aec417717f4ce06482ce77
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/lenet_keras_half.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/lenet_keras_half.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/lenet_keras_half.dir/flags.make
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: CMakeFiles/lenet_keras_half.dir/flags.make
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/src/half/lenet_keras_half.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc > CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc -o CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires:
+
+.PHONY : CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides.build
+.PHONY : CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.provides.build: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o
+
+
+# Object files for target lenet_keras_half
+lenet_keras_half_OBJECTS = \
+"CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o"
+
+# External object files for target lenet_keras_half
+lenet_keras_half_EXTERNAL_OBJECTS =
+
+lenet_keras_half: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o
+lenet_keras_half: CMakeFiles/lenet_keras_half.dir/build.make
+lenet_keras_half: libtensor_runtime.a
+lenet_keras_half: /software/cuda-9.1/lib64/libcudart_static.a
+lenet_keras_half: /usr/lib/x86_64-linux-gnu/librt.so
+lenet_keras_half: /software/cuda-9.1/lib64/libcublas.so
+lenet_keras_half: CMakeFiles/lenet_keras_half.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_keras_half"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_keras_half.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/lenet_keras_half.dir/build: lenet_keras_half
+
+.PHONY : CMakeFiles/lenet_keras_half.dir/build
+
+CMakeFiles/lenet_keras_half.dir/requires: CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o.requires
+
+.PHONY : CMakeFiles/lenet_keras_half.dir/requires
+
+CMakeFiles/lenet_keras_half.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/lenet_keras_half.dir/clean
+
+CMakeFiles/lenet_keras_half.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/lenet_keras_half.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..cbc020c708683fc107968ba8e2f6ab25474f8677
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o"
+  "lenet_keras_half.pdb"
+  "lenet_keras_half"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/lenet_keras_half.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..a5c2ac445903e24e66c542c02888a6ecd34637e4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..1332a21846db60cdff5b441cf8fd85f9be1ddadf
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../dnn_sources/src/half/lenet_keras_half.cc
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45906ee60f9080f3b3e4e4c93c4c358428d6fcf9
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o  -o lenet_keras_half  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..5a7451db601a4e0b85fc5c33a9eb78c105375e1a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_keras_half.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 17
+CMAKE_PROGRESS_2 = 18
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..76459ed35867fdd839c190e488d01189aabb99c0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/CXX.includecache
@@ -0,0 +1,610 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..2fb9d9dbac8efa5bb6ee705c20a86cd7c4df7f78
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..b63a7f4e90100d741c3a43e777aae900f5195459
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/lenet_tanh.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/lenet_tanh.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/lenet_tanh.dir/flags.make
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: CMakeFiles/lenet_tanh.dir/flags.make
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/src/lenet2_tanh.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc > CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc -o CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires:
+
+.PHONY : CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides.build
+.PHONY : CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.provides.build: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o
+
+
+# Object files for target lenet_tanh
+lenet_tanh_OBJECTS = \
+"CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o"
+
+# External object files for target lenet_tanh
+lenet_tanh_EXTERNAL_OBJECTS =
+
+lenet_tanh: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o
+lenet_tanh: CMakeFiles/lenet_tanh.dir/build.make
+lenet_tanh: libtensor_runtime.a
+lenet_tanh: /software/cuda-9.1/lib64/libcudart_static.a
+lenet_tanh: /usr/lib/x86_64-linux-gnu/librt.so
+lenet_tanh: /software/cuda-9.1/lib64/libcublas.so
+lenet_tanh: CMakeFiles/lenet_tanh.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_tanh"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_tanh.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/lenet_tanh.dir/build: lenet_tanh
+
+.PHONY : CMakeFiles/lenet_tanh.dir/build
+
+CMakeFiles/lenet_tanh.dir/requires: CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o.requires
+
+.PHONY : CMakeFiles/lenet_tanh.dir/requires
+
+CMakeFiles/lenet_tanh.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/lenet_tanh.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/lenet_tanh.dir/clean
+
+CMakeFiles/lenet_tanh.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/lenet_tanh.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..3d74e65e4e66864ade9b49f5d3871b6ba2a56e4b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o"
+  "lenet_tanh.pdb"
+  "lenet_tanh"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/lenet_tanh.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..fa863a217eb1530606b1efc1465d6050895a91fc
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..fea2b140184a4956e7c69e01ddb2d0e7d030f406
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../dnn_sources/src/lenet2_tanh.cc
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dabfee65c443e3a0947c391e2473f83382b0ba9c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o  -o lenet_tanh  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..48b3d8a54961adb9cc4e043cbf8aaaf7484fc44a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 19
+CMAKE_PROGRESS_2 = 20
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..cf64a9de7d16584dbd1e81f31a1f0269a61e013b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/CXX.includecache
@@ -0,0 +1,612 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+fcntl.h
+-
+sys/types.h
+-
+sys/stat.h
+-
+string.h
+-
+../../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+../../include/types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..c536e7042bb98ab6b384bc7d7008138d0a17257a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..6ffdff84dd27efacb392adca22206d652d4fd24f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/lenet_tanh_half.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/lenet_tanh_half.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/lenet_tanh_half.dir/flags.make
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: CMakeFiles/lenet_tanh_half.dir/flags.make
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/src/half/lenet_tanh_half.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc > CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc -o CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires:
+
+.PHONY : CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides.build
+.PHONY : CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.provides.build: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o
+
+
+# Object files for target lenet_tanh_half
+lenet_tanh_half_OBJECTS = \
+"CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o"
+
+# External object files for target lenet_tanh_half
+lenet_tanh_half_EXTERNAL_OBJECTS =
+
+lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o
+lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/build.make
+lenet_tanh_half: libtensor_runtime.a
+lenet_tanh_half: /software/cuda-9.1/lib64/libcudart_static.a
+lenet_tanh_half: /usr/lib/x86_64-linux-gnu/librt.so
+lenet_tanh_half: /software/cuda-9.1/lib64/libcublas.so
+lenet_tanh_half: CMakeFiles/lenet_tanh_half.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable lenet_tanh_half"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/lenet_tanh_half.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/lenet_tanh_half.dir/build: lenet_tanh_half
+
+.PHONY : CMakeFiles/lenet_tanh_half.dir/build
+
+CMakeFiles/lenet_tanh_half.dir/requires: CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o.requires
+
+.PHONY : CMakeFiles/lenet_tanh_half.dir/requires
+
+CMakeFiles/lenet_tanh_half.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/lenet_tanh_half.dir/clean
+
+CMakeFiles/lenet_tanh_half.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/lenet_tanh_half.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..8fbccf20b6033cb705183eeb2c8cea49fdd01ee4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o"
+  "lenet_tanh_half.pdb"
+  "lenet_tanh_half"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/lenet_tanh_half.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..84f058181b48c0ce1160ba79694d636ea646e099
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..8ef1bdbab7082da00db91de78a2b02751a2b83d5
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../dnn_sources/src/half/lenet_tanh_half.cc
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..accf8f622cae61034eec5579364fa19a717877bd
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o  -o lenet_tanh_half  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..6ec2abf9db4adc26734f3497c2ff4710a130a0ae
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/lenet_tanh_half.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 21
+CMAKE_PROGRESS_2 = 22
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks
new file mode 100644
index 0000000000000000000000000000000000000000..6f4247a6255c99f420d1df558d68745592862ff7
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks
@@ -0,0 +1 @@
+26
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..19fab2149bf120962a1699d74b7373348dc4c117
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake
@@ -0,0 +1,11 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  )
+# The set of files for implicit dependencies of each language:
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..851ed18eae0d54d5604bb55a0d3a9e72fd9fdedc
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/build.make
@@ -0,0 +1,465 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/tensor_runtime.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/tensor_runtime.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/tensor_runtime.dir/flags.make
+
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../dnn_sources/include/types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/debug.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/error.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/fp16_conversion.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/fp16_emu.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/fp16_gemm.cu
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/global_data.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/half_precision_api.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/op_overheads.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/profiling.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/include/tensor_utils.cu
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/src/tensor_runtime.cu
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_discrete.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_discrete2.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_globals.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_kernel.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_lognormal.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_mrg32k3a.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_mtgp32.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_mtgp32_kernel.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_normal.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_normal_static.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_philox4x32_x.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_poisson.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_precalc.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/curand_uniform.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /software/cuda-9.1/include/vector_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/_G_config.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/alloca.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/asm-generic/errno-base.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/asm-generic/errno.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/assert.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/algorithm
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/array
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/backward/auto_ptr.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/backward/binders.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/algorithmfwd.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/alloc_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/allocated_ptr.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/allocator.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/atomic_base.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/atomic_lockfree_defines.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_ios.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_ios.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_string.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/basic_string.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/char_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/codecvt.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/concept_check.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/cpp_type_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/cxxabi_forced.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/exception_defines.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/exception_ptr.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/functexcept.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/functional_hash.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/hash_bytes.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/hashtable.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/hashtable_policy.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ios_base.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/istream.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_classes.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_classes.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_conv.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets_nonio.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/locale_facets_nonio.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/localefwd.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/memoryfwd.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/move.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/nested_exception.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ostream.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ostream_insert.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/parse_numbers.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/postypes.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/predefined_ops.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/ptr_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/random.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/random.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/range_access.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/shared_ptr.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/shared_ptr_atomic.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/shared_ptr_base.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/sstream.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_algo.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_algobase.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_bvector.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_construct.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_function.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_heap.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_iterator.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_iterator_base_funcs.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_iterator_base_types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_map.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_multimap.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_numeric.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_pair.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_raw_storage_iter.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_relops.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_tempbuf.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_tree.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_uninitialized.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stl_vector.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/streambuf.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/streambuf_iterator.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/stringfwd.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/uniform_int_dist.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/unique_ptr.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/unordered_map.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/uses_allocator.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/bits/vector.tcc
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cctype
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cerrno
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cfloat
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/chrono
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/clocale
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cmath
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstdint
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstdio
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstdlib
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cstring
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ctime
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cwchar
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/cwctype
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/debug/debug.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/exception
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/aligned_buffer.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/alloc_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/atomicity.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/concurrence.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/new_allocator.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/numeric_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/string_conversions.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ext/type_traits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/functional
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/initializer_list
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/iomanip
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ios
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/iosfwd
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/iostream
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/istream
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/limits
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/locale
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/map
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/memory
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/new
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/numeric
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ostream
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/random
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/ratio
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/sstream
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/stdexcept
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/streambuf
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/string
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/system_error
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/tuple
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/type_traits
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/typeinfo
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/unordered_map
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/utility
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/c++/5/vector
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/ctype.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/endian.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/errno.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/features.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/libintl.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/libio.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/limits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/linux/errno.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/linux/limits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/locale.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/math.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/memory.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/pthread.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/sched.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdc-predef.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdint.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdio.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/stdlib.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/string.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/time.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/wchar.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/wctype.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/asm/errno.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/byteswap-16.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/byteswap.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/endian.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/errno.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/huge_val.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/huge_valf.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/huge_vall.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/inf.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/local_lim.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/locale.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/math-vector.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/mathcalls.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/mathdef.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/mathinline.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/nan.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/posix1_lim.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/posix2_lim.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/sched.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/select.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/select2.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/setjmp.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/sigset.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdio.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdio2.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdio_lim.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdlib-float.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/stdlib.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/string3.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/sys_errlist.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/time.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/timex.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/typesizes.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/waitflags.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/waitstatus.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/wchar.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/wchar2.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/wordsize.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/bits/xopen_lim.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/atomic_word.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/c++allocator.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/c++config.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/c++locale.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/cpu_defines.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/ctype_base.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/ctype_inline.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/error_constants.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/gthr-default.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/gthr.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/messages_members.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/opt_random.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/os_defines.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/c++/5/bits/time_members.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/gnu/stubs-64.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/gnu/stubs.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/cdefs.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/select.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/sysmacros.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/x86_64-linux-gnu/sys/types.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/include/xlocale.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/limits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/syslimits.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/adxintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/ammintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512bwintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512cdintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512dqintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512erintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmaintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmavlintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512pfintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmiintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmivlintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlbwintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vldqintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/bmi2intrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/bmiintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/clflushoptintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/clwbintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/emmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/f16cintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/float.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/fma4intrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/fxsrintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/ia32intrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/lwpintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/lzcntintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mm3dnow.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mm_malloc.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/mwaitxintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/pcommitintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/pmmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/popcntintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/prfchwintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/rdseedintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/rtmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/shaintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/smmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/stdarg.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/stddef.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/stdint.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/tbmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/tmmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/wmmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/x86intrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xmmintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xopintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsavecintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveoptintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xsavesintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: /usr/lib/gcc/x86_64-linux-gnu/5/include/xtestintrin.h
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake
+CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o: ../tensor_runtime/src/tensor_runtime.cu
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o"
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src && /usr/bin/cmake -E make_directory /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/.
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o -D generated_cubin_file:STRING=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o.cubin.txt -P /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake
+
+# Object files for target tensor_runtime
+tensor_runtime_OBJECTS =
+
+# External object files for target tensor_runtime
+tensor_runtime_EXTERNAL_OBJECTS = \
+"/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o"
+
+libtensor_runtime.a: CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o
+libtensor_runtime.a: CMakeFiles/tensor_runtime.dir/build.make
+libtensor_runtime.a: CMakeFiles/tensor_runtime.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX static library libtensor_runtime.a"
+	$(CMAKE_COMMAND) -P CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/tensor_runtime.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/tensor_runtime.dir/build: libtensor_runtime.a
+
+.PHONY : CMakeFiles/tensor_runtime.dir/build
+
+CMakeFiles/tensor_runtime.dir/requires:
+
+.PHONY : CMakeFiles/tensor_runtime.dir/requires
+
+CMakeFiles/tensor_runtime.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/tensor_runtime.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/tensor_runtime.dir/clean
+
+CMakeFiles/tensor_runtime.dir/depend: CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/tensor_runtime.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..0fc2d57e88004858ee8b99a2b2209fb14f727016
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o"
+  "libtensor_runtime.pdb"
+  "libtensor_runtime.a"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang )
+  include(CMakeFiles/tensor_runtime.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..25929ee744ec9578314eeb618bd5dd37c2f609cb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/cmake_clean_target.cmake
@@ -0,0 +1,3 @@
+file(REMOVE_RECURSE
+  "libtensor_runtime.a"
+)
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..d827cd848fb68755571287c49dbfdebfa8ded06c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.internal
@@ -0,0 +1,3 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..d827cd848fb68755571287c49dbfdebfa8ded06c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/depend.make
@@ -0,0 +1,3 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..d827cd848fb68755571287c49dbfdebfa8ded06c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/flags.make
@@ -0,0 +1,3 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ad396e0e26887f38340329a79418b5335e3a585
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/link.txt
@@ -0,0 +1,2 @@
+/usr/bin/ar qc libtensor_runtime.a  CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o
+/usr/bin/ranlib libtensor_runtime.a
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..6c29f4fb5e35d30c7f60537a3bc9a6d7192f84b9
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 23
+CMAKE_PROGRESS_2 = 24
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..549795b7cc0257a54b9db3f3858c17dc65bc19f6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.cmake
@@ -0,0 +1,294 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu") # path
+set(NVCC_generated_dependency_file "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.5/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.5/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/.") # path
+set(generated_file_internal "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o") # path
+set(generated_cubin_file_internal "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/./tensor_runtime_generated_tensor_runtime.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/software/cuda-9.1/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS -gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-O3;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand ;; ) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64;-DNO_INJECTION) # list
+set(CUDA_NVCC_INCLUDE_ARGS "-I/software/cuda-9.1/include;-I/software/cuda-9.1/lib64;-I/software/cuda-9.1/lib64/include;-I/software/cuda-9.1/include") # list (needs to be in quotes to handle spaces properly).
+set(format_flag "-c") # string
+set(cuda_language_flag ) # list
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS )
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  set(nvcc_host_compiler_flags "${nvcc_host_compiler_flags},\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "$(VCInstallDir)bin" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, excape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 9.1)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  cmake_policy(PUSH)
+  # CMake policy 0007 NEW states that empty list elements are not
+  # ignored.  I'm just setting it to avoid the warning that's printed.
+  cmake_policy(SET CMP0007 NEW)
+  # Note that this will remove all occurances of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+  cmake_policy(POP)
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invokation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend
new file mode 100644
index 0000000000000000000000000000000000000000..2f1e45707d14f4e9ff7f521aa571ff826d7173eb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/tensor_runtime/src/tensor_runtime_generated_tensor_runtime.cu.o.depend
@@ -0,0 +1,373 @@
+# Generated by: make2cmake.cmake
+SET(CUDA_NVCC_DEPEND
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu"
+ "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu"
+ "/software/cuda-9.1/include/builtin_types.h"
+ "/software/cuda-9.1/include/channel_descriptor.h"
+ "/software/cuda-9.1/include/common_functions.h"
+ "/software/cuda-9.1/include/crt/common_functions.h"
+ "/software/cuda-9.1/include/crt/device_double_functions.h"
+ "/software/cuda-9.1/include/crt/device_double_functions.hpp"
+ "/software/cuda-9.1/include/crt/device_functions.h"
+ "/software/cuda-9.1/include/crt/device_functions.hpp"
+ "/software/cuda-9.1/include/crt/host_config.h"
+ "/software/cuda-9.1/include/crt/host_defines.h"
+ "/software/cuda-9.1/include/crt/math_functions.h"
+ "/software/cuda-9.1/include/crt/math_functions.hpp"
+ "/software/cuda-9.1/include/crt/sm_70_rt.h"
+ "/software/cuda-9.1/include/crt/sm_70_rt.hpp"
+ "/software/cuda-9.1/include/cuComplex.h"
+ "/software/cuda-9.1/include/cublas_api.h"
+ "/software/cuda-9.1/include/cublas_v2.h"
+ "/software/cuda-9.1/include/cuda.h"
+ "/software/cuda-9.1/include/cuda_device_runtime_api.h"
+ "/software/cuda-9.1/include/cuda_fp16.h"
+ "/software/cuda-9.1/include/cuda_fp16.hpp"
+ "/software/cuda-9.1/include/cuda_runtime.h"
+ "/software/cuda-9.1/include/cuda_runtime_api.h"
+ "/software/cuda-9.1/include/cuda_surface_types.h"
+ "/software/cuda-9.1/include/cuda_texture_types.h"
+ "/software/cuda-9.1/include/cudnn.h"
+ "/software/cuda-9.1/include/curand.h"
+ "/software/cuda-9.1/include/curand_discrete.h"
+ "/software/cuda-9.1/include/curand_discrete2.h"
+ "/software/cuda-9.1/include/curand_globals.h"
+ "/software/cuda-9.1/include/curand_kernel.h"
+ "/software/cuda-9.1/include/curand_lognormal.h"
+ "/software/cuda-9.1/include/curand_mrg32k3a.h"
+ "/software/cuda-9.1/include/curand_mtgp32.h"
+ "/software/cuda-9.1/include/curand_mtgp32_kernel.h"
+ "/software/cuda-9.1/include/curand_normal.h"
+ "/software/cuda-9.1/include/curand_normal_static.h"
+ "/software/cuda-9.1/include/curand_philox4x32_x.h"
+ "/software/cuda-9.1/include/curand_poisson.h"
+ "/software/cuda-9.1/include/curand_precalc.h"
+ "/software/cuda-9.1/include/curand_uniform.h"
+ "/software/cuda-9.1/include/device_atomic_functions.h"
+ "/software/cuda-9.1/include/device_atomic_functions.hpp"
+ "/software/cuda-9.1/include/device_functions.h"
+ "/software/cuda-9.1/include/device_launch_parameters.h"
+ "/software/cuda-9.1/include/device_types.h"
+ "/software/cuda-9.1/include/driver_functions.h"
+ "/software/cuda-9.1/include/driver_types.h"
+ "/software/cuda-9.1/include/host_config.h"
+ "/software/cuda-9.1/include/host_defines.h"
+ "/software/cuda-9.1/include/library_types.h"
+ "/software/cuda-9.1/include/sm_20_atomic_functions.h"
+ "/software/cuda-9.1/include/sm_20_atomic_functions.hpp"
+ "/software/cuda-9.1/include/sm_20_intrinsics.h"
+ "/software/cuda-9.1/include/sm_20_intrinsics.hpp"
+ "/software/cuda-9.1/include/sm_30_intrinsics.h"
+ "/software/cuda-9.1/include/sm_30_intrinsics.hpp"
+ "/software/cuda-9.1/include/sm_32_atomic_functions.h"
+ "/software/cuda-9.1/include/sm_32_atomic_functions.hpp"
+ "/software/cuda-9.1/include/sm_32_intrinsics.h"
+ "/software/cuda-9.1/include/sm_32_intrinsics.hpp"
+ "/software/cuda-9.1/include/sm_35_atomic_functions.h"
+ "/software/cuda-9.1/include/sm_35_intrinsics.h"
+ "/software/cuda-9.1/include/sm_60_atomic_functions.h"
+ "/software/cuda-9.1/include/sm_60_atomic_functions.hpp"
+ "/software/cuda-9.1/include/sm_61_intrinsics.h"
+ "/software/cuda-9.1/include/sm_61_intrinsics.hpp"
+ "/software/cuda-9.1/include/surface_functions.h"
+ "/software/cuda-9.1/include/surface_indirect_functions.h"
+ "/software/cuda-9.1/include/surface_types.h"
+ "/software/cuda-9.1/include/texture_fetch_functions.h"
+ "/software/cuda-9.1/include/texture_indirect_functions.h"
+ "/software/cuda-9.1/include/texture_types.h"
+ "/software/cuda-9.1/include/vector_functions.h"
+ "/software/cuda-9.1/include/vector_functions.hpp"
+ "/software/cuda-9.1/include/vector_types.h"
+ "/usr/include/_G_config.h"
+ "/usr/include/alloca.h"
+ "/usr/include/asm-generic/errno-base.h"
+ "/usr/include/asm-generic/errno.h"
+ "/usr/include/assert.h"
+ "/usr/include/c++/5/algorithm"
+ "/usr/include/c++/5/array"
+ "/usr/include/c++/5/backward/auto_ptr.h"
+ "/usr/include/c++/5/backward/binders.h"
+ "/usr/include/c++/5/bits/algorithmfwd.h"
+ "/usr/include/c++/5/bits/alloc_traits.h"
+ "/usr/include/c++/5/bits/allocated_ptr.h"
+ "/usr/include/c++/5/bits/allocator.h"
+ "/usr/include/c++/5/bits/atomic_base.h"
+ "/usr/include/c++/5/bits/atomic_lockfree_defines.h"
+ "/usr/include/c++/5/bits/basic_ios.h"
+ "/usr/include/c++/5/bits/basic_ios.tcc"
+ "/usr/include/c++/5/bits/basic_string.h"
+ "/usr/include/c++/5/bits/basic_string.tcc"
+ "/usr/include/c++/5/bits/char_traits.h"
+ "/usr/include/c++/5/bits/codecvt.h"
+ "/usr/include/c++/5/bits/concept_check.h"
+ "/usr/include/c++/5/bits/cpp_type_traits.h"
+ "/usr/include/c++/5/bits/cxxabi_forced.h"
+ "/usr/include/c++/5/bits/exception_defines.h"
+ "/usr/include/c++/5/bits/exception_ptr.h"
+ "/usr/include/c++/5/bits/functexcept.h"
+ "/usr/include/c++/5/bits/functional_hash.h"
+ "/usr/include/c++/5/bits/hash_bytes.h"
+ "/usr/include/c++/5/bits/hashtable.h"
+ "/usr/include/c++/5/bits/hashtable_policy.h"
+ "/usr/include/c++/5/bits/ios_base.h"
+ "/usr/include/c++/5/bits/istream.tcc"
+ "/usr/include/c++/5/bits/locale_classes.h"
+ "/usr/include/c++/5/bits/locale_classes.tcc"
+ "/usr/include/c++/5/bits/locale_conv.h"
+ "/usr/include/c++/5/bits/locale_facets.h"
+ "/usr/include/c++/5/bits/locale_facets.tcc"
+ "/usr/include/c++/5/bits/locale_facets_nonio.h"
+ "/usr/include/c++/5/bits/locale_facets_nonio.tcc"
+ "/usr/include/c++/5/bits/localefwd.h"
+ "/usr/include/c++/5/bits/memoryfwd.h"
+ "/usr/include/c++/5/bits/move.h"
+ "/usr/include/c++/5/bits/nested_exception.h"
+ "/usr/include/c++/5/bits/ostream.tcc"
+ "/usr/include/c++/5/bits/ostream_insert.h"
+ "/usr/include/c++/5/bits/parse_numbers.h"
+ "/usr/include/c++/5/bits/postypes.h"
+ "/usr/include/c++/5/bits/predefined_ops.h"
+ "/usr/include/c++/5/bits/ptr_traits.h"
+ "/usr/include/c++/5/bits/random.h"
+ "/usr/include/c++/5/bits/random.tcc"
+ "/usr/include/c++/5/bits/range_access.h"
+ "/usr/include/c++/5/bits/shared_ptr.h"
+ "/usr/include/c++/5/bits/shared_ptr_atomic.h"
+ "/usr/include/c++/5/bits/shared_ptr_base.h"
+ "/usr/include/c++/5/bits/sstream.tcc"
+ "/usr/include/c++/5/bits/stl_algo.h"
+ "/usr/include/c++/5/bits/stl_algobase.h"
+ "/usr/include/c++/5/bits/stl_bvector.h"
+ "/usr/include/c++/5/bits/stl_construct.h"
+ "/usr/include/c++/5/bits/stl_function.h"
+ "/usr/include/c++/5/bits/stl_heap.h"
+ "/usr/include/c++/5/bits/stl_iterator.h"
+ "/usr/include/c++/5/bits/stl_iterator_base_funcs.h"
+ "/usr/include/c++/5/bits/stl_iterator_base_types.h"
+ "/usr/include/c++/5/bits/stl_map.h"
+ "/usr/include/c++/5/bits/stl_multimap.h"
+ "/usr/include/c++/5/bits/stl_numeric.h"
+ "/usr/include/c++/5/bits/stl_pair.h"
+ "/usr/include/c++/5/bits/stl_raw_storage_iter.h"
+ "/usr/include/c++/5/bits/stl_relops.h"
+ "/usr/include/c++/5/bits/stl_tempbuf.h"
+ "/usr/include/c++/5/bits/stl_tree.h"
+ "/usr/include/c++/5/bits/stl_uninitialized.h"
+ "/usr/include/c++/5/bits/stl_vector.h"
+ "/usr/include/c++/5/bits/streambuf.tcc"
+ "/usr/include/c++/5/bits/streambuf_iterator.h"
+ "/usr/include/c++/5/bits/stringfwd.h"
+ "/usr/include/c++/5/bits/uniform_int_dist.h"
+ "/usr/include/c++/5/bits/unique_ptr.h"
+ "/usr/include/c++/5/bits/unordered_map.h"
+ "/usr/include/c++/5/bits/uses_allocator.h"
+ "/usr/include/c++/5/bits/vector.tcc"
+ "/usr/include/c++/5/cctype"
+ "/usr/include/c++/5/cerrno"
+ "/usr/include/c++/5/cfloat"
+ "/usr/include/c++/5/chrono"
+ "/usr/include/c++/5/clocale"
+ "/usr/include/c++/5/cmath"
+ "/usr/include/c++/5/cstdint"
+ "/usr/include/c++/5/cstdio"
+ "/usr/include/c++/5/cstdlib"
+ "/usr/include/c++/5/cstring"
+ "/usr/include/c++/5/ctime"
+ "/usr/include/c++/5/cwchar"
+ "/usr/include/c++/5/cwctype"
+ "/usr/include/c++/5/debug/debug.h"
+ "/usr/include/c++/5/exception"
+ "/usr/include/c++/5/ext/aligned_buffer.h"
+ "/usr/include/c++/5/ext/alloc_traits.h"
+ "/usr/include/c++/5/ext/atomicity.h"
+ "/usr/include/c++/5/ext/concurrence.h"
+ "/usr/include/c++/5/ext/new_allocator.h"
+ "/usr/include/c++/5/ext/numeric_traits.h"
+ "/usr/include/c++/5/ext/string_conversions.h"
+ "/usr/include/c++/5/ext/type_traits.h"
+ "/usr/include/c++/5/functional"
+ "/usr/include/c++/5/initializer_list"
+ "/usr/include/c++/5/iomanip"
+ "/usr/include/c++/5/ios"
+ "/usr/include/c++/5/iosfwd"
+ "/usr/include/c++/5/iostream"
+ "/usr/include/c++/5/istream"
+ "/usr/include/c++/5/limits"
+ "/usr/include/c++/5/locale"
+ "/usr/include/c++/5/map"
+ "/usr/include/c++/5/memory"
+ "/usr/include/c++/5/new"
+ "/usr/include/c++/5/numeric"
+ "/usr/include/c++/5/ostream"
+ "/usr/include/c++/5/random"
+ "/usr/include/c++/5/ratio"
+ "/usr/include/c++/5/sstream"
+ "/usr/include/c++/5/stdexcept"
+ "/usr/include/c++/5/streambuf"
+ "/usr/include/c++/5/string"
+ "/usr/include/c++/5/system_error"
+ "/usr/include/c++/5/tuple"
+ "/usr/include/c++/5/type_traits"
+ "/usr/include/c++/5/typeinfo"
+ "/usr/include/c++/5/unordered_map"
+ "/usr/include/c++/5/utility"
+ "/usr/include/c++/5/vector"
+ "/usr/include/ctype.h"
+ "/usr/include/endian.h"
+ "/usr/include/errno.h"
+ "/usr/include/features.h"
+ "/usr/include/libintl.h"
+ "/usr/include/libio.h"
+ "/usr/include/limits.h"
+ "/usr/include/linux/errno.h"
+ "/usr/include/linux/limits.h"
+ "/usr/include/locale.h"
+ "/usr/include/math.h"
+ "/usr/include/memory.h"
+ "/usr/include/pthread.h"
+ "/usr/include/sched.h"
+ "/usr/include/stdc-predef.h"
+ "/usr/include/stdint.h"
+ "/usr/include/stdio.h"
+ "/usr/include/stdlib.h"
+ "/usr/include/string.h"
+ "/usr/include/time.h"
+ "/usr/include/wchar.h"
+ "/usr/include/wctype.h"
+ "/usr/include/x86_64-linux-gnu/asm/errno.h"
+ "/usr/include/x86_64-linux-gnu/bits/byteswap-16.h"
+ "/usr/include/x86_64-linux-gnu/bits/byteswap.h"
+ "/usr/include/x86_64-linux-gnu/bits/endian.h"
+ "/usr/include/x86_64-linux-gnu/bits/errno.h"
+ "/usr/include/x86_64-linux-gnu/bits/huge_val.h"
+ "/usr/include/x86_64-linux-gnu/bits/huge_valf.h"
+ "/usr/include/x86_64-linux-gnu/bits/huge_vall.h"
+ "/usr/include/x86_64-linux-gnu/bits/inf.h"
+ "/usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h"
+ "/usr/include/x86_64-linux-gnu/bits/local_lim.h"
+ "/usr/include/x86_64-linux-gnu/bits/locale.h"
+ "/usr/include/x86_64-linux-gnu/bits/math-vector.h"
+ "/usr/include/x86_64-linux-gnu/bits/mathcalls.h"
+ "/usr/include/x86_64-linux-gnu/bits/mathdef.h"
+ "/usr/include/x86_64-linux-gnu/bits/mathinline.h"
+ "/usr/include/x86_64-linux-gnu/bits/nan.h"
+ "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h"
+ "/usr/include/x86_64-linux-gnu/bits/posix2_lim.h"
+ "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h"
+ "/usr/include/x86_64-linux-gnu/bits/sched.h"
+ "/usr/include/x86_64-linux-gnu/bits/select.h"
+ "/usr/include/x86_64-linux-gnu/bits/select2.h"
+ "/usr/include/x86_64-linux-gnu/bits/setjmp.h"
+ "/usr/include/x86_64-linux-gnu/bits/sigset.h"
+ "/usr/include/x86_64-linux-gnu/bits/stdio.h"
+ "/usr/include/x86_64-linux-gnu/bits/stdio2.h"
+ "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h"
+ "/usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h"
+ "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h"
+ "/usr/include/x86_64-linux-gnu/bits/stdlib.h"
+ "/usr/include/x86_64-linux-gnu/bits/string3.h"
+ "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h"
+ "/usr/include/x86_64-linux-gnu/bits/time.h"
+ "/usr/include/x86_64-linux-gnu/bits/timex.h"
+ "/usr/include/x86_64-linux-gnu/bits/types.h"
+ "/usr/include/x86_64-linux-gnu/bits/typesizes.h"
+ "/usr/include/x86_64-linux-gnu/bits/waitflags.h"
+ "/usr/include/x86_64-linux-gnu/bits/waitstatus.h"
+ "/usr/include/x86_64-linux-gnu/bits/wchar.h"
+ "/usr/include/x86_64-linux-gnu/bits/wchar2.h"
+ "/usr/include/x86_64-linux-gnu/bits/wordsize.h"
+ "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/atomic_word.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/c++allocator.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/c++config.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/c++locale.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/cpu_defines.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/ctype_base.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/ctype_inline.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/error_constants.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/gthr-default.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/gthr.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/messages_members.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/opt_random.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/os_defines.h"
+ "/usr/include/x86_64-linux-gnu/c++/5/bits/time_members.h"
+ "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h"
+ "/usr/include/x86_64-linux-gnu/gnu/stubs.h"
+ "/usr/include/x86_64-linux-gnu/sys/cdefs.h"
+ "/usr/include/x86_64-linux-gnu/sys/select.h"
+ "/usr/include/x86_64-linux-gnu/sys/sysmacros.h"
+ "/usr/include/x86_64-linux-gnu/sys/types.h"
+ "/usr/include/xlocale.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/limits.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include-fixed/syslimits.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/adxintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/ammintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512bwintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512cdintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512dqintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512erintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmaintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512ifmavlintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512pfintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmiintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vbmivlintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlbwintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vldqintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512vlintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/bmi2intrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/bmiintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/clflushoptintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/clwbintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/emmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/f16cintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/float.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/fma4intrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/fxsrintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/ia32intrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/lwpintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/lzcntintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/mm3dnow.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/mm_malloc.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/mmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/mwaitxintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/pcommitintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/pmmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/popcntintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/prfchwintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/rdseedintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/rtmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/shaintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/smmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/stdarg.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/stddef.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/stdint.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/tbmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/tmmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/wmmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/x86intrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xmmintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xopintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsavecintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsaveoptintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xsavesintrin.h"
+ "/usr/lib/gcc/x86_64-linux-gnu/5/include/xtestintrin.h"
+)
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/CXX.includecache b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/CXX.includecache
new file mode 100644
index 0000000000000000000000000000000000000000..f1de1eb76c952e08c1055a7b226a3b20cb722e8b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/CXX.includecache
@@ -0,0 +1,602 @@
+#IncludeRegexLine: ^[ 	]*#[ 	]*(include|import)[ 	]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform: 
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+sstream
+-
+../../tensor_runtime/include/tensor.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+types.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
+stdio.h
+-
+stdlib.h
+-
+unistd.h
+-
+../../tensor_runtime/include/tensor_runtime.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+../include/utils.h
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+cuda_runtime.h
+-
+device_launch_parameters.h
+-
+cublas_v2.h
+-
+cudnn.h
+-
+cublas_api.h
+-
+cuda_fp16.h
+-
+driver_types.h
+-
+
+/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+stdio.h
+-
+cstdlib
+-
+cmath
+-
+memory
+-
+string
+-
+
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/common_functions.h
+crt/common_functions.h
+/software/cuda-9.1/include/crt/common_functions.h
+
+/software/cuda-9.1/include/crt/common_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+string.h
+-
+time.h
+-
+new
+-
+stdio.h
+-
+stdlib.h
+-
+assert.h
+-
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/crt/cuda_device_runtime_api.h
+math_functions.h
+/software/cuda-9.1/include/crt/math_functions.h
+
+/software/cuda-9.1/include/crt/device_double_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_double_functions.hpp
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+
+/software/cuda-9.1/include/crt/device_double_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/device_functions.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+device_functions.hpp
+/software/cuda-9.1/include/crt/device_functions.hpp
+device_atomic_functions.h
+/software/cuda-9.1/include/crt/device_atomic_functions.h
+device_double_functions.h
+/software/cuda-9.1/include/crt/device_double_functions.h
+sm_20_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_20_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_32_atomic_functions.h
+sm_35_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_35_atomic_functions.h
+sm_60_atomic_functions.h
+/software/cuda-9.1/include/crt/sm_60_atomic_functions.h
+sm_20_intrinsics.h
+/software/cuda-9.1/include/crt/sm_20_intrinsics.h
+sm_30_intrinsics.h
+/software/cuda-9.1/include/crt/sm_30_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/crt/sm_32_intrinsics.h
+sm_35_intrinsics.h
+/software/cuda-9.1/include/crt/sm_35_intrinsics.h
+sm_61_intrinsics.h
+/software/cuda-9.1/include/crt/sm_61_intrinsics.h
+sm_70_rt.h
+/software/cuda-9.1/include/crt/sm_70_rt.h
+surface_functions.h
+/software/cuda-9.1/include/crt/surface_functions.h
+texture_fetch_functions.h
+/software/cuda-9.1/include/crt/texture_fetch_functions.h
+texture_indirect_functions.h
+/software/cuda-9.1/include/crt/texture_indirect_functions.h
+surface_indirect_functions.h
+/software/cuda-9.1/include/crt/surface_indirect_functions.h
+
+/software/cuda-9.1/include/crt/device_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/func_macro.h
+
+/software/cuda-9.1/include/crt/host_config.h
+features.h
+-
+crtdefs.h
+-
+corecrt.h
+-
+cstdarg
+-
+
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/crt/math_functions.h
+__config
+-
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math.h
+-
+stdlib.h
+-
+cmath
+-
+cstdlib
+-
+crt/func_macro.h
+-
+math_functions.hpp
+/software/cuda-9.1/include/crt/math_functions.hpp
+
+/software/cuda-9.1/include/crt/math_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+math_constants.h
+/software/cuda-9.1/include/crt/math_constants.h
+crt/func_macro.h
+-
+
+/software/cuda-9.1/include/crt/sm_70_rt.h
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+sm_70_rt.hpp
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+
+/software/cuda-9.1/include/crt/sm_70_rt.hpp
+builtin_types.h
+/software/cuda-9.1/include/crt/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/crt/device_types.h
+host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/cuComplex.h
+math.h
+-
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/cublas_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuComplex.h
+/software/cuda-9.1/include/cuComplex.h
+cuda_fp16.h
+-
+library_types.h
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/cublas_v2.h
+cublas_api.h
+/software/cuda-9.1/include/cublas_api.h
+
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/cuda_fp16.h
+cuda_fp16.hpp
+/software/cuda-9.1/include/cuda_fp16.hpp
+
+/software/cuda-9.1/include/cuda_fp16.hpp
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime.h
+host_config.h
+/software/cuda-9.1/include/host_config.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+library_types.h
+/software/cuda-9.1/include/library_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+cuda_runtime_api.h
+/software/cuda-9.1/include/cuda_runtime_api.h
+driver_functions.h
+/software/cuda-9.1/include/driver_functions.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_functions.h
+/software/cuda-9.1/include/vector_functions.h
+nvrtc_device_runtime.h
+/software/cuda-9.1/include/nvrtc_device_runtime.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+common_functions.h
+/software/cuda-9.1/include/common_functions.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+device_functions.h
+/software/cuda-9.1/include/device_functions.h
+device_launch_parameters.h
+/software/cuda-9.1/include/device_launch_parameters.h
+functional
+-
+utility
+-
+
+/software/cuda-9.1/include/cuda_runtime_api.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_device_runtime_api.h
+/software/cuda-9.1/include/cuda_device_runtime_api.h
+
+/software/cuda-9.1/include/cuda_surface_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/cuda_texture_types.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+channel_descriptor.h
+/software/cuda-9.1/include/channel_descriptor.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/cudnn.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+cuda_runtime.h
+-
+
+/software/cuda-9.1/include/device_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+device_atomic_functions.hpp
+/software/cuda-9.1/include/device_atomic_functions.hpp
+
+/software/cuda-9.1/include/device_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/device_functions.h
+crt/device_functions.h
+/software/cuda-9.1/include/crt/device_functions.h
+
+/software/cuda-9.1/include/device_launch_parameters.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/driver_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/driver_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+limits.h
+-
+stddef.h
+-
+
+/software/cuda-9.1/include/host_config.h
+crt/host_config.h
+/software/cuda-9.1/include/crt/host_config.h
+
+/software/cuda-9.1/include/host_defines.h
+crt/host_defines.h
+/software/cuda-9.1/include/crt/host_defines.h
+
+/software/cuda-9.1/include/library_types.h
+
+/software/cuda-9.1/include/math_constants.h
+
+/software/cuda-9.1/include/sm_20_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_atomic_functions.hpp
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_20_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_20_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_20_intrinsics.hpp
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_20_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_30_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_30_intrinsics.hpp
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_30_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_atomic_functions.hpp
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_32_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_32_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_32_intrinsics.hpp
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_32_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_35_atomic_functions.h
+sm_32_atomic_functions.h
+/software/cuda-9.1/include/sm_32_atomic_functions.h
+
+/software/cuda-9.1/include/sm_35_intrinsics.h
+sm_32_intrinsics.h
+/software/cuda-9.1/include/sm_32_intrinsics.h
+
+/software/cuda-9.1/include/sm_60_atomic_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_60_atomic_functions.hpp
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+
+/software/cuda-9.1/include/sm_60_atomic_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/sm_61_intrinsics.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+sm_61_intrinsics.hpp
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+
+/software/cuda-9.1/include/sm_61_intrinsics.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+device_types.h
+/software/cuda-9.1/include/device_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_surface_types.h
+/software/cuda-9.1/include/cuda_surface_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+surface_types.h
+/software/cuda-9.1/include/surface_types.h
+
+/software/cuda-9.1/include/surface_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/surface_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/texture_fetch_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+cuda_texture_types.h
+/software/cuda-9.1/include/cuda_texture_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+texture_types.h
+/software/cuda-9.1/include/texture_types.h
+
+/software/cuda-9.1/include/texture_indirect_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
+/software/cuda-9.1/include/texture_types.h
+driver_types.h
+/software/cuda-9.1/include/driver_types.h
+
+/software/cuda-9.1/include/vector_functions.h
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+vector_functions.hpp
+/software/cuda-9.1/include/vector_functions.hpp
+
+/software/cuda-9.1/include/vector_functions.hpp
+builtin_types.h
+/software/cuda-9.1/include/builtin_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+vector_types.h
+/software/cuda-9.1/include/vector_types.h
+
+/software/cuda-9.1/include/vector_types.h
+host_defines.h
+/software/cuda-9.1/include/host_defines.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ada151a56820ec3f45cc29fe1047cbc6e0bc4dd0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake
@@ -0,0 +1,29 @@
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  "CXX"
+  )
+# The set of files for implicit dependencies of each language:
+set(CMAKE_DEPENDS_CHECK_CXX
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc" "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o"
+  )
+set(CMAKE_CXX_COMPILER_ID "GNU")
+
+# Preprocessor definitions for this target.
+set(CMAKE_TARGET_DEFINITIONS_CXX
+  "NO_INJECTION"
+  )
+
+# The include file search paths:
+set(CMAKE_CXX_TARGET_INCLUDE_PATH
+  "/software/cuda-9.1/lib64"
+  "/software/cuda-9.1/lib64/include"
+  "/software/cuda-9.1/include"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/tensor_runtime.dir/DependInfo.cmake"
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/build.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/build.make
new file mode 100644
index 0000000000000000000000000000000000000000..5c43744f7421581364eff3f09324e7ab2e4605d0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/build.make
@@ -0,0 +1,117 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/test_ops.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/test_ops.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/test_ops.dir/flags.make
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: CMakeFiles/test_ops.dir/flags.make
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/src/test_ops.cc
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o"
+	/usr/bin/c++   $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o -c /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc > CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s"
+	/usr/bin/c++  $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc -o CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires:
+
+.PHONY : CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides.build
+.PHONY : CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.provides.build: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o
+
+
+# Object files for target test_ops
+test_ops_OBJECTS = \
+"CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o"
+
+# External object files for target test_ops
+test_ops_EXTERNAL_OBJECTS =
+
+test_ops: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o
+test_ops: CMakeFiles/test_ops.dir/build.make
+test_ops: libtensor_runtime.a
+test_ops: /software/cuda-9.1/lib64/libcudart_static.a
+test_ops: /usr/lib/x86_64-linux-gnu/librt.so
+test_ops: /software/cuda-9.1/lib64/libcublas.so
+test_ops: CMakeFiles/test_ops.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable test_ops"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/test_ops.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/test_ops.dir/build: test_ops
+
+.PHONY : CMakeFiles/test_ops.dir/build
+
+CMakeFiles/test_ops.dir/requires: CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o.requires
+
+.PHONY : CMakeFiles/test_ops.dir/requires
+
+CMakeFiles/test_ops.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/test_ops.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/test_ops.dir/clean
+
+CMakeFiles/test_ops.dir/depend:
+	cd /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/test_ops.dir/depend
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/cmake_clean.cmake b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/cmake_clean.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..8b746a56138925ea678e94e7c8d20b911a5f6197
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o"
+  "test_ops.pdb"
+  "test_ops"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/test_ops.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.internal b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.internal
new file mode 100644
index 0000000000000000000000000000000000000000..d786512ef56577e0da723c0e49ee5dd6454d0c33
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.internal
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
+ /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+ /software/cuda-9.1/include/builtin_types.h
+ /software/cuda-9.1/include/channel_descriptor.h
+ /software/cuda-9.1/include/common_functions.h
+ /software/cuda-9.1/include/crt/common_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.h
+ /software/cuda-9.1/include/crt/device_double_functions.hpp
+ /software/cuda-9.1/include/crt/device_functions.h
+ /software/cuda-9.1/include/crt/device_functions.hpp
+ /software/cuda-9.1/include/crt/func_macro.h
+ /software/cuda-9.1/include/crt/host_config.h
+ /software/cuda-9.1/include/crt/host_defines.h
+ /software/cuda-9.1/include/crt/math_functions.h
+ /software/cuda-9.1/include/crt/math_functions.hpp
+ /software/cuda-9.1/include/crt/sm_70_rt.h
+ /software/cuda-9.1/include/crt/sm_70_rt.hpp
+ /software/cuda-9.1/include/cuComplex.h
+ /software/cuda-9.1/include/cublas_api.h
+ /software/cuda-9.1/include/cublas_v2.h
+ /software/cuda-9.1/include/cuda_device_runtime_api.h
+ /software/cuda-9.1/include/cuda_fp16.h
+ /software/cuda-9.1/include/cuda_fp16.hpp
+ /software/cuda-9.1/include/cuda_runtime.h
+ /software/cuda-9.1/include/cuda_runtime_api.h
+ /software/cuda-9.1/include/cuda_surface_types.h
+ /software/cuda-9.1/include/cuda_texture_types.h
+ /software/cuda-9.1/include/cudnn.h
+ /software/cuda-9.1/include/device_atomic_functions.h
+ /software/cuda-9.1/include/device_atomic_functions.hpp
+ /software/cuda-9.1/include/device_functions.h
+ /software/cuda-9.1/include/device_launch_parameters.h
+ /software/cuda-9.1/include/device_types.h
+ /software/cuda-9.1/include/driver_functions.h
+ /software/cuda-9.1/include/driver_types.h
+ /software/cuda-9.1/include/host_config.h
+ /software/cuda-9.1/include/host_defines.h
+ /software/cuda-9.1/include/library_types.h
+ /software/cuda-9.1/include/math_constants.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.h
+ /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_20_intrinsics.h
+ /software/cuda-9.1/include/sm_20_intrinsics.hpp
+ /software/cuda-9.1/include/sm_30_intrinsics.h
+ /software/cuda-9.1/include/sm_30_intrinsics.hpp
+ /software/cuda-9.1/include/sm_32_atomic_functions.h
+ /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_32_intrinsics.h
+ /software/cuda-9.1/include/sm_32_intrinsics.hpp
+ /software/cuda-9.1/include/sm_35_atomic_functions.h
+ /software/cuda-9.1/include/sm_35_intrinsics.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.h
+ /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+ /software/cuda-9.1/include/sm_61_intrinsics.h
+ /software/cuda-9.1/include/sm_61_intrinsics.hpp
+ /software/cuda-9.1/include/surface_functions.h
+ /software/cuda-9.1/include/surface_indirect_functions.h
+ /software/cuda-9.1/include/surface_types.h
+ /software/cuda-9.1/include/texture_fetch_functions.h
+ /software/cuda-9.1/include/texture_indirect_functions.h
+ /software/cuda-9.1/include/texture_types.h
+ /software/cuda-9.1/include/vector_functions.h
+ /software/cuda-9.1/include/vector_functions.hpp
+ /software/cuda-9.1/include/vector_types.h
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.make
new file mode 100644
index 0000000000000000000000000000000000000000..86ecbbbd064b25e07fe9510e44340d64070eb592
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/depend.make
@@ -0,0 +1,71 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/include/types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/include/utils.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../dnn_sources/src/test_ops.cc
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../tensor_runtime/include/tensor.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: ../tensor_runtime/include/tensor_runtime.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/builtin_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/channel_descriptor.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/common_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/common_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_double_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_double_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/device_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/func_macro.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/host_config.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/host_defines.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/math_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/math_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/crt/sm_70_rt.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuComplex.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cublas_api.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cublas_v2.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_device_runtime_api.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_fp16.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_fp16.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_runtime.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_runtime_api.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_surface_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cuda_texture_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/cudnn.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_atomic_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_atomic_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_launch_parameters.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/device_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/driver_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/driver_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/host_config.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/host_defines.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/library_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/math_constants.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_atomic_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_20_intrinsics.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_30_intrinsics.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_atomic_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_32_intrinsics.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_35_atomic_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_35_intrinsics.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_60_atomic_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/sm_61_intrinsics.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/surface_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/surface_indirect_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/surface_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/texture_fetch_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/texture_indirect_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/texture_types.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/vector_functions.h
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/vector_functions.hpp
+CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o: /software/cuda-9.1/include/vector_types.h
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/flags.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/flags.make
new file mode 100644
index 0000000000000000000000000000000000000000..7f3e38e9dde5ac65083aa99a137ec856b355bbfe
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# compile CXX with /usr/bin/c++
+CXX_FLAGS =  -std=c++11   
+
+CXX_DEFINES = -DNO_INJECTION
+
+CXX_INCLUDES = -I/software/cuda-9.1/lib64 -I/software/cuda-9.1/lib64/include -I/software/cuda-9.1/include 
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/link.txt b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/link.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bcafeb846353a7ae25f096bbf90c6af9ac1e476
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++    -std=c++11    CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o  -o test_ops  -L/software/cuda-9.1/lib64  -L/software/cuda-9.1/lib64/lib  -L/software/cuda-9.1/lib64/lib64 -rdynamic libtensor_runtime.a /software/cuda-9.1/lib64/libcudart_static.a -lpthread -ldl -lrt /software/cuda-9.1/lib64/libcublas.so -lcudnn -lcurand -Wl,-rpath,/software/cuda-9.1/lib64:/software/cuda-9.1/lib64/lib:/software/cuda-9.1/lib64/lib64 
diff --git a/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/progress.make b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/progress.make
new file mode 100644
index 0000000000000000000000000000000000000000..9fd0bf530ff91ac241731c4d5429c9b46c9d34a9
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/test_ops.dir/progress.make
@@ -0,0 +1,3 @@
+CMAKE_PROGRESS_1 = 25
+CMAKE_PROGRESS_2 = 26
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/Makefile b/llvm/projects/hpvm-tensor-rt/build/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..f79ccd9d8b79aeef44733439fbb2af41d46d1f56
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/Makefile
@@ -0,0 +1,676 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.5
+
+# Default target executed when no arguments are given to make.
+default_target: all
+
+.PHONY : default_target
+
+# Allow only one "make -f Makefile2" at a time, but pass parallelism.
+.NOTPARALLEL:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build
+
+#=============================================================================
+# Targets provided globally by CMake.
+
+# Special rule for the target edit_cache
+edit_cache:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
+	/usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
+.PHONY : edit_cache
+
+# Special rule for the target edit_cache
+edit_cache/fast: edit_cache
+
+.PHONY : edit_cache/fast
+
+# Special rule for the target rebuild_cache
+rebuild_cache:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
+	/usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : rebuild_cache
+
+# Special rule for the target rebuild_cache
+rebuild_cache/fast: rebuild_cache
+
+.PHONY : rebuild_cache/fast
+
+# The main all target
+all: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles/progress.marks
+	$(MAKE) -f CMakeFiles/Makefile2 all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/CMakeFiles 0
+.PHONY : all
+
+# The main clean target
+clean:
+	$(MAKE) -f CMakeFiles/Makefile2 clean
+.PHONY : clean
+
+# The main clean target
+clean/fast: clean
+
+.PHONY : clean/fast
+
+# Prepare targets for installation.
+preinstall: all
+	$(MAKE) -f CMakeFiles/Makefile2 preinstall
+.PHONY : preinstall
+
+# Prepare targets for installation.
+preinstall/fast:
+	$(MAKE) -f CMakeFiles/Makefile2 preinstall
+.PHONY : preinstall/fast
+
+# clear depends
+depend:
+	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
+.PHONY : depend
+
+#=============================================================================
+# Target rules for targets named lenet_keras_half
+
+# Build rule for target.
+lenet_keras_half: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 lenet_keras_half
+.PHONY : lenet_keras_half
+
+# fast build rule for target.
+lenet_keras_half/fast:
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/build
+.PHONY : lenet_keras_half/fast
+
+#=============================================================================
+# Target rules for targets named lenet_tanh_half
+
+# Build rule for target.
+lenet_tanh_half: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 lenet_tanh_half
+.PHONY : lenet_tanh_half
+
+# fast build rule for target.
+lenet_tanh_half/fast:
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/build
+.PHONY : lenet_tanh_half/fast
+
+#=============================================================================
+# Target rules for targets named fc4_half
+
+# Build rule for target.
+fc4_half: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 fc4_half
+.PHONY : fc4_half
+
+# fast build rule for target.
+fc4_half/fast:
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/build
+.PHONY : fc4_half/fast
+
+#=============================================================================
+# Target rules for targets named fc3_clipped
+
+# Build rule for target.
+fc3_clipped: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 fc3_clipped
+.PHONY : fc3_clipped
+
+# fast build rule for target.
+fc3_clipped/fast:
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/build
+.PHONY : fc3_clipped/fast
+
+#=============================================================================
+# Target rules for targets named fc2_clipped
+
+# Build rule for target.
+fc2_clipped: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 fc2_clipped
+.PHONY : fc2_clipped
+
+# fast build rule for target.
+fc2_clipped/fast:
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/build
+.PHONY : fc2_clipped/fast
+
+#=============================================================================
+# Target rules for targets named test_ops
+
+# Build rule for target.
+test_ops: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 test_ops
+.PHONY : test_ops
+
+# fast build rule for target.
+test_ops/fast:
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/build
+.PHONY : test_ops/fast
+
+#=============================================================================
+# Target rules for targets named fc3_half
+
+# Build rule for target.
+fc3_half: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 fc3_half
+.PHONY : fc3_half
+
+# fast build rule for target.
+fc3_half/fast:
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/build
+.PHONY : fc3_half/fast
+
+#=============================================================================
+# Target rules for targets named tensor_runtime
+
+# Build rule for target.
+tensor_runtime: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 tensor_runtime
+.PHONY : tensor_runtime
+
+# fast build rule for target.
+tensor_runtime/fast:
+	$(MAKE) -f CMakeFiles/tensor_runtime.dir/build.make CMakeFiles/tensor_runtime.dir/build
+.PHONY : tensor_runtime/fast
+
+#=============================================================================
+# Target rules for targets named fc4_clipped
+
+# Build rule for target.
+fc4_clipped: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 fc4_clipped
+.PHONY : fc4_clipped
+
+# fast build rule for target.
+fc4_clipped/fast:
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/build
+.PHONY : fc4_clipped/fast
+
+#=============================================================================
+# Target rules for targets named cifar_keras
+
+# Build rule for target.
+cifar_keras: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 cifar_keras
+.PHONY : cifar_keras
+
+# fast build rule for target.
+cifar_keras/fast:
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/build
+.PHONY : cifar_keras/fast
+
+#=============================================================================
+# Target rules for targets named lenet_keras
+
+# Build rule for target.
+lenet_keras: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 lenet_keras
+.PHONY : lenet_keras
+
+# fast build rule for target.
+lenet_keras/fast:
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/build
+.PHONY : lenet_keras/fast
+
+#=============================================================================
+# Target rules for targets named lenet_tanh
+
+# Build rule for target.
+lenet_tanh: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 lenet_tanh
+.PHONY : lenet_tanh
+
+# fast build rule for target.
+lenet_tanh/fast:
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/build
+.PHONY : lenet_tanh/fast
+
+#=============================================================================
+# Target rules for targets named fc2_half
+
+# Build rule for target.
+fc2_half: cmake_check_build_system
+	$(MAKE) -f CMakeFiles/Makefile2 fc2_half
+.PHONY : fc2_half
+
+# fast build rule for target.
+fc2_half/fast:
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/build
+.PHONY : fc2_half/fast
+
+dnn_sources/src/cifar_keras.o: dnn_sources/src/cifar_keras.cc.o
+
+.PHONY : dnn_sources/src/cifar_keras.o
+
+# target to build an object file
+dnn_sources/src/cifar_keras.cc.o:
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.o
+.PHONY : dnn_sources/src/cifar_keras.cc.o
+
+dnn_sources/src/cifar_keras.i: dnn_sources/src/cifar_keras.cc.i
+
+.PHONY : dnn_sources/src/cifar_keras.i
+
+# target to preprocess a source file
+dnn_sources/src/cifar_keras.cc.i:
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.i
+.PHONY : dnn_sources/src/cifar_keras.cc.i
+
+dnn_sources/src/cifar_keras.s: dnn_sources/src/cifar_keras.cc.s
+
+.PHONY : dnn_sources/src/cifar_keras.s
+
+# target to generate assembly for a file
+dnn_sources/src/cifar_keras.cc.s:
+	$(MAKE) -f CMakeFiles/cifar_keras.dir/build.make CMakeFiles/cifar_keras.dir/dnn_sources/src/cifar_keras.cc.s
+.PHONY : dnn_sources/src/cifar_keras.cc.s
+
+dnn_sources/src/fc2_clipped.o: dnn_sources/src/fc2_clipped.cc.o
+
+.PHONY : dnn_sources/src/fc2_clipped.o
+
+# target to build an object file
+dnn_sources/src/fc2_clipped.cc.o:
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.o
+.PHONY : dnn_sources/src/fc2_clipped.cc.o
+
+dnn_sources/src/fc2_clipped.i: dnn_sources/src/fc2_clipped.cc.i
+
+.PHONY : dnn_sources/src/fc2_clipped.i
+
+# target to preprocess a source file
+dnn_sources/src/fc2_clipped.cc.i:
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.i
+.PHONY : dnn_sources/src/fc2_clipped.cc.i
+
+dnn_sources/src/fc2_clipped.s: dnn_sources/src/fc2_clipped.cc.s
+
+.PHONY : dnn_sources/src/fc2_clipped.s
+
+# target to generate assembly for a file
+dnn_sources/src/fc2_clipped.cc.s:
+	$(MAKE) -f CMakeFiles/fc2_clipped.dir/build.make CMakeFiles/fc2_clipped.dir/dnn_sources/src/fc2_clipped.cc.s
+.PHONY : dnn_sources/src/fc2_clipped.cc.s
+
+dnn_sources/src/fc3_clipped.o: dnn_sources/src/fc3_clipped.cc.o
+
+.PHONY : dnn_sources/src/fc3_clipped.o
+
+# target to build an object file
+dnn_sources/src/fc3_clipped.cc.o:
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.o
+.PHONY : dnn_sources/src/fc3_clipped.cc.o
+
+dnn_sources/src/fc3_clipped.i: dnn_sources/src/fc3_clipped.cc.i
+
+.PHONY : dnn_sources/src/fc3_clipped.i
+
+# target to preprocess a source file
+dnn_sources/src/fc3_clipped.cc.i:
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.i
+.PHONY : dnn_sources/src/fc3_clipped.cc.i
+
+dnn_sources/src/fc3_clipped.s: dnn_sources/src/fc3_clipped.cc.s
+
+.PHONY : dnn_sources/src/fc3_clipped.s
+
+# target to generate assembly for a file
+dnn_sources/src/fc3_clipped.cc.s:
+	$(MAKE) -f CMakeFiles/fc3_clipped.dir/build.make CMakeFiles/fc3_clipped.dir/dnn_sources/src/fc3_clipped.cc.s
+.PHONY : dnn_sources/src/fc3_clipped.cc.s
+
+dnn_sources/src/fc4_clipped.o: dnn_sources/src/fc4_clipped.cc.o
+
+.PHONY : dnn_sources/src/fc4_clipped.o
+
+# target to build an object file
+dnn_sources/src/fc4_clipped.cc.o:
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.o
+.PHONY : dnn_sources/src/fc4_clipped.cc.o
+
+dnn_sources/src/fc4_clipped.i: dnn_sources/src/fc4_clipped.cc.i
+
+.PHONY : dnn_sources/src/fc4_clipped.i
+
+# target to preprocess a source file
+dnn_sources/src/fc4_clipped.cc.i:
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.i
+.PHONY : dnn_sources/src/fc4_clipped.cc.i
+
+dnn_sources/src/fc4_clipped.s: dnn_sources/src/fc4_clipped.cc.s
+
+.PHONY : dnn_sources/src/fc4_clipped.s
+
+# target to generate assembly for a file
+dnn_sources/src/fc4_clipped.cc.s:
+	$(MAKE) -f CMakeFiles/fc4_clipped.dir/build.make CMakeFiles/fc4_clipped.dir/dnn_sources/src/fc4_clipped.cc.s
+.PHONY : dnn_sources/src/fc4_clipped.cc.s
+
+dnn_sources/src/half/fc2_half.o: dnn_sources/src/half/fc2_half.cc.o
+
+.PHONY : dnn_sources/src/half/fc2_half.o
+
+# target to build an object file
+dnn_sources/src/half/fc2_half.cc.o:
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.o
+.PHONY : dnn_sources/src/half/fc2_half.cc.o
+
+dnn_sources/src/half/fc2_half.i: dnn_sources/src/half/fc2_half.cc.i
+
+.PHONY : dnn_sources/src/half/fc2_half.i
+
+# target to preprocess a source file
+dnn_sources/src/half/fc2_half.cc.i:
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.i
+.PHONY : dnn_sources/src/half/fc2_half.cc.i
+
+dnn_sources/src/half/fc2_half.s: dnn_sources/src/half/fc2_half.cc.s
+
+.PHONY : dnn_sources/src/half/fc2_half.s
+
+# target to generate assembly for a file
+dnn_sources/src/half/fc2_half.cc.s:
+	$(MAKE) -f CMakeFiles/fc2_half.dir/build.make CMakeFiles/fc2_half.dir/dnn_sources/src/half/fc2_half.cc.s
+.PHONY : dnn_sources/src/half/fc2_half.cc.s
+
+dnn_sources/src/half/fc3_half.o: dnn_sources/src/half/fc3_half.cc.o
+
+.PHONY : dnn_sources/src/half/fc3_half.o
+
+# target to build an object file
+dnn_sources/src/half/fc3_half.cc.o:
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.o
+.PHONY : dnn_sources/src/half/fc3_half.cc.o
+
+dnn_sources/src/half/fc3_half.i: dnn_sources/src/half/fc3_half.cc.i
+
+.PHONY : dnn_sources/src/half/fc3_half.i
+
+# target to preprocess a source file
+dnn_sources/src/half/fc3_half.cc.i:
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.i
+.PHONY : dnn_sources/src/half/fc3_half.cc.i
+
+dnn_sources/src/half/fc3_half.s: dnn_sources/src/half/fc3_half.cc.s
+
+.PHONY : dnn_sources/src/half/fc3_half.s
+
+# target to generate assembly for a file
+dnn_sources/src/half/fc3_half.cc.s:
+	$(MAKE) -f CMakeFiles/fc3_half.dir/build.make CMakeFiles/fc3_half.dir/dnn_sources/src/half/fc3_half.cc.s
+.PHONY : dnn_sources/src/half/fc3_half.cc.s
+
+dnn_sources/src/half/fc4_half.o: dnn_sources/src/half/fc4_half.cc.o
+
+.PHONY : dnn_sources/src/half/fc4_half.o
+
+# target to build an object file
+dnn_sources/src/half/fc4_half.cc.o:
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.o
+.PHONY : dnn_sources/src/half/fc4_half.cc.o
+
+dnn_sources/src/half/fc4_half.i: dnn_sources/src/half/fc4_half.cc.i
+
+.PHONY : dnn_sources/src/half/fc4_half.i
+
+# target to preprocess a source file
+dnn_sources/src/half/fc4_half.cc.i:
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.i
+.PHONY : dnn_sources/src/half/fc4_half.cc.i
+
+dnn_sources/src/half/fc4_half.s: dnn_sources/src/half/fc4_half.cc.s
+
+.PHONY : dnn_sources/src/half/fc4_half.s
+
+# target to generate assembly for a file
+dnn_sources/src/half/fc4_half.cc.s:
+	$(MAKE) -f CMakeFiles/fc4_half.dir/build.make CMakeFiles/fc4_half.dir/dnn_sources/src/half/fc4_half.cc.s
+.PHONY : dnn_sources/src/half/fc4_half.cc.s
+
+dnn_sources/src/half/lenet_keras_half.o: dnn_sources/src/half/lenet_keras_half.cc.o
+
+.PHONY : dnn_sources/src/half/lenet_keras_half.o
+
+# target to build an object file
+dnn_sources/src/half/lenet_keras_half.cc.o:
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.o
+.PHONY : dnn_sources/src/half/lenet_keras_half.cc.o
+
+dnn_sources/src/half/lenet_keras_half.i: dnn_sources/src/half/lenet_keras_half.cc.i
+
+.PHONY : dnn_sources/src/half/lenet_keras_half.i
+
+# target to preprocess a source file
+dnn_sources/src/half/lenet_keras_half.cc.i:
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.i
+.PHONY : dnn_sources/src/half/lenet_keras_half.cc.i
+
+dnn_sources/src/half/lenet_keras_half.s: dnn_sources/src/half/lenet_keras_half.cc.s
+
+.PHONY : dnn_sources/src/half/lenet_keras_half.s
+
+# target to generate assembly for a file
+dnn_sources/src/half/lenet_keras_half.cc.s:
+	$(MAKE) -f CMakeFiles/lenet_keras_half.dir/build.make CMakeFiles/lenet_keras_half.dir/dnn_sources/src/half/lenet_keras_half.cc.s
+.PHONY : dnn_sources/src/half/lenet_keras_half.cc.s
+
+dnn_sources/src/half/lenet_tanh_half.o: dnn_sources/src/half/lenet_tanh_half.cc.o
+
+.PHONY : dnn_sources/src/half/lenet_tanh_half.o
+
+# target to build an object file
+dnn_sources/src/half/lenet_tanh_half.cc.o:
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.o
+.PHONY : dnn_sources/src/half/lenet_tanh_half.cc.o
+
+dnn_sources/src/half/lenet_tanh_half.i: dnn_sources/src/half/lenet_tanh_half.cc.i
+
+.PHONY : dnn_sources/src/half/lenet_tanh_half.i
+
+# target to preprocess a source file
+dnn_sources/src/half/lenet_tanh_half.cc.i:
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.i
+.PHONY : dnn_sources/src/half/lenet_tanh_half.cc.i
+
+dnn_sources/src/half/lenet_tanh_half.s: dnn_sources/src/half/lenet_tanh_half.cc.s
+
+.PHONY : dnn_sources/src/half/lenet_tanh_half.s
+
+# target to generate assembly for a file
+dnn_sources/src/half/lenet_tanh_half.cc.s:
+	$(MAKE) -f CMakeFiles/lenet_tanh_half.dir/build.make CMakeFiles/lenet_tanh_half.dir/dnn_sources/src/half/lenet_tanh_half.cc.s
+.PHONY : dnn_sources/src/half/lenet_tanh_half.cc.s
+
+dnn_sources/src/lenet2_tanh.o: dnn_sources/src/lenet2_tanh.cc.o
+
+.PHONY : dnn_sources/src/lenet2_tanh.o
+
+# target to build an object file
+dnn_sources/src/lenet2_tanh.cc.o:
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.o
+.PHONY : dnn_sources/src/lenet2_tanh.cc.o
+
+dnn_sources/src/lenet2_tanh.i: dnn_sources/src/lenet2_tanh.cc.i
+
+.PHONY : dnn_sources/src/lenet2_tanh.i
+
+# target to preprocess a source file
+dnn_sources/src/lenet2_tanh.cc.i:
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.i
+.PHONY : dnn_sources/src/lenet2_tanh.cc.i
+
+dnn_sources/src/lenet2_tanh.s: dnn_sources/src/lenet2_tanh.cc.s
+
+.PHONY : dnn_sources/src/lenet2_tanh.s
+
+# target to generate assembly for a file
+dnn_sources/src/lenet2_tanh.cc.s:
+	$(MAKE) -f CMakeFiles/lenet_tanh.dir/build.make CMakeFiles/lenet_tanh.dir/dnn_sources/src/lenet2_tanh.cc.s
+.PHONY : dnn_sources/src/lenet2_tanh.cc.s
+
+dnn_sources/src/lenet_keras.o: dnn_sources/src/lenet_keras.cc.o
+
+.PHONY : dnn_sources/src/lenet_keras.o
+
+# target to build an object file
+dnn_sources/src/lenet_keras.cc.o:
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.o
+.PHONY : dnn_sources/src/lenet_keras.cc.o
+
+dnn_sources/src/lenet_keras.i: dnn_sources/src/lenet_keras.cc.i
+
+.PHONY : dnn_sources/src/lenet_keras.i
+
+# target to preprocess a source file
+dnn_sources/src/lenet_keras.cc.i:
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.i
+.PHONY : dnn_sources/src/lenet_keras.cc.i
+
+dnn_sources/src/lenet_keras.s: dnn_sources/src/lenet_keras.cc.s
+
+.PHONY : dnn_sources/src/lenet_keras.s
+
+# target to generate assembly for a file
+dnn_sources/src/lenet_keras.cc.s:
+	$(MAKE) -f CMakeFiles/lenet_keras.dir/build.make CMakeFiles/lenet_keras.dir/dnn_sources/src/lenet_keras.cc.s
+.PHONY : dnn_sources/src/lenet_keras.cc.s
+
+dnn_sources/src/test_ops.o: dnn_sources/src/test_ops.cc.o
+
+.PHONY : dnn_sources/src/test_ops.o
+
+# target to build an object file
+dnn_sources/src/test_ops.cc.o:
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.o
+.PHONY : dnn_sources/src/test_ops.cc.o
+
+dnn_sources/src/test_ops.i: dnn_sources/src/test_ops.cc.i
+
+.PHONY : dnn_sources/src/test_ops.i
+
+# target to preprocess a source file
+dnn_sources/src/test_ops.cc.i:
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.i
+.PHONY : dnn_sources/src/test_ops.cc.i
+
+dnn_sources/src/test_ops.s: dnn_sources/src/test_ops.cc.s
+
+.PHONY : dnn_sources/src/test_ops.s
+
+# target to generate assembly for a file
+dnn_sources/src/test_ops.cc.s:
+	$(MAKE) -f CMakeFiles/test_ops.dir/build.make CMakeFiles/test_ops.dir/dnn_sources/src/test_ops.cc.s
+.PHONY : dnn_sources/src/test_ops.cc.s
+
+# Help Target
+help:
+	@echo "The following are some of the valid targets for this Makefile:"
+	@echo "... all (the default if no target is provided)"
+	@echo "... clean"
+	@echo "... depend"
+	@echo "... edit_cache"
+	@echo "... lenet_keras_half"
+	@echo "... lenet_tanh_half"
+	@echo "... rebuild_cache"
+	@echo "... fc4_half"
+	@echo "... fc3_clipped"
+	@echo "... fc2_clipped"
+	@echo "... test_ops"
+	@echo "... fc3_half"
+	@echo "... tensor_runtime"
+	@echo "... fc4_clipped"
+	@echo "... cifar_keras"
+	@echo "... lenet_keras"
+	@echo "... lenet_tanh"
+	@echo "... fc2_half"
+	@echo "... dnn_sources/src/cifar_keras.o"
+	@echo "... dnn_sources/src/cifar_keras.i"
+	@echo "... dnn_sources/src/cifar_keras.s"
+	@echo "... dnn_sources/src/fc2_clipped.o"
+	@echo "... dnn_sources/src/fc2_clipped.i"
+	@echo "... dnn_sources/src/fc2_clipped.s"
+	@echo "... dnn_sources/src/fc3_clipped.o"
+	@echo "... dnn_sources/src/fc3_clipped.i"
+	@echo "... dnn_sources/src/fc3_clipped.s"
+	@echo "... dnn_sources/src/fc4_clipped.o"
+	@echo "... dnn_sources/src/fc4_clipped.i"
+	@echo "... dnn_sources/src/fc4_clipped.s"
+	@echo "... dnn_sources/src/half/fc2_half.o"
+	@echo "... dnn_sources/src/half/fc2_half.i"
+	@echo "... dnn_sources/src/half/fc2_half.s"
+	@echo "... dnn_sources/src/half/fc3_half.o"
+	@echo "... dnn_sources/src/half/fc3_half.i"
+	@echo "... dnn_sources/src/half/fc3_half.s"
+	@echo "... dnn_sources/src/half/fc4_half.o"
+	@echo "... dnn_sources/src/half/fc4_half.i"
+	@echo "... dnn_sources/src/half/fc4_half.s"
+	@echo "... dnn_sources/src/half/lenet_keras_half.o"
+	@echo "... dnn_sources/src/half/lenet_keras_half.i"
+	@echo "... dnn_sources/src/half/lenet_keras_half.s"
+	@echo "... dnn_sources/src/half/lenet_tanh_half.o"
+	@echo "... dnn_sources/src/half/lenet_tanh_half.i"
+	@echo "... dnn_sources/src/half/lenet_tanh_half.s"
+	@echo "... dnn_sources/src/lenet2_tanh.o"
+	@echo "... dnn_sources/src/lenet2_tanh.i"
+	@echo "... dnn_sources/src/lenet2_tanh.s"
+	@echo "... dnn_sources/src/lenet_keras.o"
+	@echo "... dnn_sources/src/lenet_keras.i"
+	@echo "... dnn_sources/src/lenet_keras.s"
+	@echo "... dnn_sources/src/test_ops.o"
+	@echo "... dnn_sources/src/test_ops.i"
+	@echo "... dnn_sources/src/test_ops.s"
+.PHONY : help
+
+
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/llvm/projects/hpvm-tensor-rt/build/accuracy_summary b/llvm/projects/hpvm-tensor-rt/build/accuracy_summary
new file mode 100644
index 0000000000000000000000000000000000000000..f38e1bf61fbc710c979585d52d994681584f04d8
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/accuracy_summary
@@ -0,0 +1,6 @@
+tensorGemm	2.000000	0.091064	0.000181	0.000000	0.017550	0.021991	0.000000	50176.000000	21609.626953	16725.333984	5575.111328
+tensorAdd	4.000000	0.125392	0.000249	0.000000	0.023913	0.029955	0.000000	64.000000	22.797260	12.800000	2.560000
+tensorClippedRelu	5.000000	0.024725	0.000057	0.000000	0.027869	0.034927	0.000000	64.000000	21.333334	10.666667	1.777778
+tensorGemm	2.000000	0.183891	0.001160	0.000000	0.017444	0.021799	0.000000	640.000000	275.632996	213.333328	71.111115
+tensorAdd	1.000000	0.152207	0.000961	0.000000	0.014370	0.018029	0.000000	5.000000	2.500000	2.500000	1.250000
+tensorClippedRelu	3.000000	0.004657	0.000077	0.000000	0.021192	0.026456	0.000000	5.000000	1.934264	1.250000	0.312500
diff --git a/llvm/projects/hpvm-tensor-rt/build/cifar_keras b/llvm/projects/hpvm-tensor-rt/build/cifar_keras
new file mode 100755
index 0000000000000000000000000000000000000000..bed9ac1acc552cd8bb3713c6a1cc39dabea69428
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/cifar_keras differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/cmake_install.cmake b/llvm/projects/hpvm-tensor-rt/build/cmake_install.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..c4d523e117f78f2c5b6d4fd7c0d8fbdbd82a9011
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/cmake_install.cmake
@@ -0,0 +1,44 @@
+# Install script for directory: /home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt
+
+# Set the install prefix
+if(NOT DEFINED CMAKE_INSTALL_PREFIX)
+  set(CMAKE_INSTALL_PREFIX "/usr/local")
+endif()
+string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+  if(BUILD_TYPE)
+    string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+           CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+  else()
+    set(CMAKE_INSTALL_CONFIG_NAME "")
+  endif()
+  message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+endif()
+
+# Set the component getting installed.
+if(NOT CMAKE_INSTALL_COMPONENT)
+  if(COMPONENT)
+    message(STATUS "Install component: \"${COMPONENT}\"")
+    set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+  else()
+    set(CMAKE_INSTALL_COMPONENT)
+  endif()
+endif()
+
+# Install shared libraries without execute permission?
+if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+  set(CMAKE_INSTALL_SO_NO_EXE "1")
+endif()
+
+if(CMAKE_INSTALL_COMPONENT)
+  set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
+else()
+  set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
+endif()
+
+string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
+       "${CMAKE_INSTALL_MANIFEST_FILES}")
+file(WRITE "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build/${CMAKE_INSTALL_MANIFEST}"
+     "${CMAKE_INSTALL_MANIFEST_CONTENT}")
diff --git a/llvm/projects/hpvm-tensor-rt/build/fc2_clipped b/llvm/projects/hpvm-tensor-rt/build/fc2_clipped
new file mode 100755
index 0000000000000000000000000000000000000000..de064ca7ccf662dee5f507f3c874a805cddad39d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc2_clipped differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/fc2_half b/llvm/projects/hpvm-tensor-rt/build/fc2_half
new file mode 100755
index 0000000000000000000000000000000000000000..228adf76eca4bdf690d6521f1212f97784fe1e11
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc2_half differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/fc3_clipped b/llvm/projects/hpvm-tensor-rt/build/fc3_clipped
new file mode 100755
index 0000000000000000000000000000000000000000..d2c498c0dca637923eebbe9818bebc0ce66152da
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc3_clipped differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/fc3_half b/llvm/projects/hpvm-tensor-rt/build/fc3_half
new file mode 100755
index 0000000000000000000000000000000000000000..c45d7357018663c501efc2ed9abf75bbfc4a423c
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc3_half differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/fc4_clipped b/llvm/projects/hpvm-tensor-rt/build/fc4_clipped
new file mode 100755
index 0000000000000000000000000000000000000000..ea5ac5b3b9429c2b2d5ad8eb3dcb155047dfb460
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc4_clipped differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/fc4_half b/llvm/projects/hpvm-tensor-rt/build/fc4_half
new file mode 100755
index 0000000000000000000000000000000000000000..0c7951e0f363bda143b683ec0cffc0b3516c5b41
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/fc4_half differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/final_accuracy b/llvm/projects/hpvm-tensor-rt/build/final_accuracy
new file mode 100644
index 0000000000000000000000000000000000000000..5dc9446b78c496341f0dbd837720d59340012c6f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/final_accuracy
@@ -0,0 +1 @@
+98.699997
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_keras b/llvm/projects/hpvm-tensor-rt/build/lenet_keras
new file mode 100755
index 0000000000000000000000000000000000000000..6b4a876cf5379aeeeb193a9d76ba85fd1ebf5a13
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_keras differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_keras_half b/llvm/projects/hpvm-tensor-rt/build/lenet_keras_half
new file mode 100755
index 0000000000000000000000000000000000000000..e073bb378e153b36a0108d949b29c1cad48f8050
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_keras_half differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_tanh b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh
new file mode 100755
index 0000000000000000000000000000000000000000..fcc2dcbe14b70f40034324ca889e029152893c5d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/lenet_tanh_half b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh_half
new file mode 100755
index 0000000000000000000000000000000000000000..f67f83391dd5c30a7ac8b3f3f363e287bfbe2ffb
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/lenet_tanh_half differ
diff --git a/llvm/projects/hpvm-tensor-rt/build/opentuner_flags b/llvm/projects/hpvm-tensor-rt/build/opentuner_flags
new file mode 100644
index 0000000000000000000000000000000000000000..27c9ea9893123efc7982145e8c3423596d3cb75c
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/opentuner_flags
@@ -0,0 +1,14 @@
+2
+4
+5
+2
+1
+3
+7
+2
+4
+1
+3
+7
+7
+1
diff --git a/llvm/projects/hpvm-tensor-rt/build/profile_data.txt b/llvm/projects/hpvm-tensor-rt/build/profile_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76fa1fd253668213c3fb5f206fc9a2b50fc08ce6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/build/profile_data.txt
@@ -0,0 +1,30 @@
+tensorConv1	1543644372.403396
+tensorConv_end1	1543644372.406054	0.002658
+tensorAdd1	1543644372.406065
+tensorAdd_end1	1543644372.408212	0.002148
+tensorPooling1	1543644372.408217
+tensorPooling_end1	1543644372.408522	0.000305
+tensorTanh1	1543644372.408527
+tensorTanh_end1	1543644372.408551	0.000023
+tensorConv2	1543644372.408554
+tensorConv_end2	1543644372.414943	0.006389
+tensorAdd2	1543644372.414948
+tensorAdd_end2	1543644372.427007	0.012059
+tensorPooling2	1543644372.427010
+tensorPooling_end2	1543644372.427250	0.000240
+tensorTanh2	1543644372.427255
+tensorTanh_end2	1543644372.427266	0.000010
+tensorGemmGPU1	1543644372.427270
+tensorGemmGPU_end1	1543644372.432362	0.005091
+tensorAdd3	1543644372.432367
+tensorAdd_end3	1543644372.435863	0.003496
+tensorTanh3	1543644372.435866
+tensorTanh_end3	1543644372.435877	0.000010
+tensorGemmGPU2	1543644372.435880
+tensorGemmGPU_end2	1543644372.436120	0.000240
+tensorAdd4	1543644372.436124
+tensorAdd_end4	1543644372.436202	0.000078
+tensorTanh4	1543644372.436205
+tensorTanh_end4	1543644372.436215	0.000009
+tensorSoftmax1	1543644372.436220
+tensorSoftmax_end1	1543644372.436362	0.000142
diff --git a/llvm/projects/hpvm-tensor-rt/build/test_ops b/llvm/projects/hpvm-tensor-rt/build/test_ops
new file mode 100755
index 0000000000000000000000000000000000000000..af87a88704c7eb268f3e5fc2fdcb003f4439e9fa
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/build/test_ops differ
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/op_overheads.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/op_overheads.h
new file mode 100644
index 0000000000000000000000000000000000000000..4eaf88e6d613c51a5a75ef8ce73b55a3410f1dbd
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/op_overheads.h
@@ -0,0 +1,148 @@
+
+
+#ifndef OP_OVERHEADS_HEADER
+#define OP_OVERHEADS_HEADER
+
+
+#include <sstream>
+#include "../../tensor_runtime/include/tensor.h"
+#include "types.h"
+
+
+float scale_down_factor = 10000.0;
+float error_factor = 0.1;
+std::string result_str = "";
+
+
+// TODO: Every routine needs testing
+
+
+// private function
+static float getScaledComps(double total_comps, int error_scale){
+
+  total_comps = total_comps / scale_down_factor;
+  float comp_scale = 1.0 + (error_factor * error_scale);
+  total_comps = total_comps / comp_scale;
+
+  return total_comps;
+}
+
+
+static void addNormToResult(float comps){
+
+  std::ostringstream ss;
+  ss << std::fixed << comps;
+  
+  result_str.append( std::string(ss.str()) );
+  result_str.append("\t");
+}
+
+
+
+static void addCompsToResult(float comps){
+
+  std::ostringstream ss;
+  ss << std::fixed << comps;
+  
+  result_str.append( std::string(ss.str()) );
+  result_str.append("\n");
+}
+
+
+void add_conv_overheads(void* input_ptr, void* filter_ptr,
+			int strideA, int strideB, int error_scale){
+
+  Tensor* input = (Tensor*) input_ptr;
+  Tensor* filter = (Tensor*) filter_ptr;
+  
+}
+
+
+void add_gemm_overheads(void* lhs_ptr, void* rhs_ptr, int error_scale){
+
+  Tensor* lhs = (Tensor*) lhs_ptr;
+  Tensor* rhs = (Tensor*) rhs_ptr;
+    
+  int m = lhs->dims.dim_sizes[0];
+  // The rhs last dimension must contain the neurons
+  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
+  int k = 1;
+  
+  // Flattening the dimensions after the batch dimension
+  for (int j = 1 ; j < lhs->dims.num_dims; j++){
+    k = k * lhs->dims.dim_sizes[j]; // input neurons
+  }
+
+  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
+  // Dimension-note: Check if k is same across the two tensors
+  printf("m = %d, n = %d, k = %d \n", m, n, k);
+  
+  if(rhs_k != k){
+    printf("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
+    abort();
+  }
+  
+  double total_comps = m * n * rhs_k * 1.0;
+  float scaled_comps = getScaledComps(total_comps, error_scale);
+  
+  printf("error_scale = %d, total_comps = %f, scaled_comps = %f \n",
+	 error_scale, total_comps, scaled_comps);
+
+  addCompsToResult(scaled_comps);
+  
+}
+
+
+void add_bias_overheads(void* input_ptr, int error_scale){
+
+  Tensor* input = (Tensor*) input_ptr;
+  
+  double total_comps = input->num_elems;
+  float scaled_comps = getScaledComps(total_comps, error_scale);
+
+  printf("error_scale = %d, total_comps = %f, scaled_comps = %f \n",
+	 error_scale, total_comps, scaled_comps);
+
+  addCompsToResult(scaled_comps);
+
+}
+
+
+void add_relu_overheads(void* input_ptr, int error_scale){
+  
+  Tensor* input = (Tensor*) input_ptr;
+  
+  double total_comps = input->num_elems;
+  float scaled_comps = getScaledComps(total_comps, error_scale);
+
+  printf("error_scale = %d, total_comps = %f, scaled_comps = %f \n",
+	 error_scale, total_comps, scaled_comps);				     
+
+  addCompsToResult(scaled_comps);
+
+}
+
+float add_pool_overheads(void* input_ptr, int kernel_size,
+			 int stride_size, int error_scale){
+
+}
+
+
+void add_norms(void* norms_ptr){
+
+  Norm_t* norms = (Norm_t*) norms_ptr;
+
+  addNormToResult(norms->l1_norm);
+  addNormToResult(norms->l2_norm);
+  addNormToResult(norms->inf_norm);
+ 
+}
+
+void dump_result(char* file_name){
+
+  FILE* fp = fopen(file_name, "w+");
+  fwrite(result_str.c_str(), 1, result_str.length(), fp);
+  fclose(fp); 
+}
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
new file mode 100644
index 0000000000000000000000000000000000000000..3479a94abec9d6357edc26e4507ec80f8b060acb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/types.h
@@ -0,0 +1,38 @@
+
+#ifndef TYPES_HEADER
+#define TYPES_HEADER
+
+
+struct Dimension_t{
+  int num_dims;
+  size_t* dim_sizes;
+};
+
+
+struct Tensor_t{
+  int tensor_id; // used for indexing (in the tensor runtime)
+  int data_type; // {float_type, double_type, half_type, int_type}
+  int data_format; // {nchw, nhwc}
+  void* host_data;
+  size_t num_elems; // Total elements
+  size_t size_in_bytes; // Total size in bytes
+  struct Dimension_t dims;
+};
+
+
+enum Tensor_type_t{
+  float_type,
+  double_type,
+  half_type,
+  int_type
+};
+
+
+// NOTE: Currently only NCHW is supported due to limited cuDNN support
+enum Tensor_format_t{
+  nchw,
+  nhwc 
+};
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..bbbdff5e363cca9d82f065a3972368632cfef726
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
@@ -0,0 +1,349 @@
+
+// Header guards
+#ifndef UTILS_HEADER
+#define UTILS_HEADER
+
+
+#include <sstream>
+#include "../../tensor_runtime/include/tensor.h"
+#include "types.h"
+
+
+void printTensorInfo(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+
+  if(tensor->gpu_data != NULL){
+    printf("Successful cudaMalloc \n");
+  }
+
+  printf("tensor dims = %d \n", tensor->dims.num_dims);
+  printf("dim1_size = %d \n", tensor->dims.dim_sizes[0]);
+  printf("dim2_size = %d \n", tensor->dims.dim_sizes[1]);
+  printf("num_elems = %d \n", tensor->num_elems);
+}
+
+
+// FIXIT: Move this to debug.h and include in all files
+void dumpWeightsToFile(char* file_name, void* weights_ptr){
+
+  struct Tensor* weights = (Tensor*) weights_ptr;
+
+  // Move data back to host
+  hpvm_request_tensor(weights, 0);
+  
+  FILE* fp = fopen(file_name, "wb");
+  if(fp == NULL){
+    printf("File %s could not be created. Check if directory exists \n", file_name);
+    abort();
+  }
+
+  printf("size_in_bytes = %d \n", weights->size_in_bytes);
+  size_t bytes_written = fwrite(weights->host_data, 1, weights->size_in_bytes, fp);
+  printf("bytes_written = %d \n", bytes_written);
+  fclose(fp);
+}
+
+
+
+void fillTensorWithOnes(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+    
+  hpvm_request_tensor(tensor, 0);
+  
+  // initialization is specific to the floating point type
+  if(tensor->data_type == CUDNN_DATA_FLOAT){
+    float* data_arr = (float*) tensor->host_data;
+    for(unsigned int i = 0; i < tensor->num_elems; i++){
+      data_arr[i] = 1.0;    
+    }
+  }
+}
+
+
+void fillWithOnesAndTwos(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+  
+  hpvm_request_tensor(tensor, 0);
+  
+  // initialization is specific to the floating point type
+  if(tensor->data_type == CUDNN_DATA_FLOAT){
+    float* data_arr = (float*) tensor->host_data;
+    for(unsigned int i = 0; i < tensor->num_elems/2; i++){
+      data_arr[i] = 1.0;    
+    }
+
+    for(unsigned int i = tensor->num_elems/2; i < tensor->num_elems; i++){
+      data_arr[i] = 2.0;    
+    }
+ 
+  }
+}
+
+
+void fillTensorWithNegOnes(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+    
+  hpvm_request_tensor(tensor, 0);
+  
+  // initialization is specific to the floating point type
+  if(tensor->data_type == CUDNN_DATA_FLOAT){
+    float* data_arr = (float*) tensor->host_data;
+    for(unsigned int i = 0; i < tensor->num_elems; i++){
+      data_arr[i] = -1.0;    
+    }
+  }
+}
+
+
+void fillTensorVals(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+  // initialization is specific to the floating point type
+  if(tensor->data_type == CUDNN_DATA_FLOAT){
+    float* data_arr = (float*) tensor->host_data;
+    for(unsigned int i = 0; i < tensor->num_elems; i++){
+      data_arr[i] = i + 1;    
+    }
+  }
+}
+
+
+void printTensorValues(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+
+  hpvm_request_tensor(tensor, 0);
+  
+  // printing is specific to the floating point type
+  if(tensor->data_type == CUDNN_DATA_FLOAT){
+    float* data_arr = (float*) tensor->host_data;
+    for(unsigned int i = 0; i < tensor->num_elems; i++){
+      printf("%f,", data_arr[i]);    
+    }
+  }
+
+  printf("\n");
+}
+
+
+void printTensorDims(void* tensor_ptr){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+
+  printf("Num_elems = %d \n", tensor->num_elems);
+  for (int i = 0; i < tensor->dims.num_dims; i++){
+    printf("dim[%d] = %d \n", i, tensor->dims.dim_sizes[i]);
+  }
+}
+
+
+
+void compareTensors(void* tensor1_ptr, void* tensor2_ptr){
+
+  struct Tensor* tensor1 = (struct Tensor*) tensor1_ptr;
+  struct Tensor* tensor2 = (struct Tensor*) tensor2_ptr;
+
+  hpvm_request_tensor(tensor1, 0);
+  hpvm_request_tensor(tensor2, 0);
+
+  float* tensor_data1 = (float*) tensor1->host_data;
+  float* tensor_data2 = (float*) tensor2->host_data;
+  
+  for(unsigned int i = 0; i < tensor1->num_elems; i++){
+    if(tensor_data1[i] != tensor_data2[i]){
+      printf("Tensor data mismatch at index %d \n", i);
+      abort();
+    }
+  }
+}
+
+
+
+void compareValues(void* tensor_ptr, float* data, size_t num_elems){
+
+  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
+    
+  hpvm_request_tensor(tensor, 0);
+  
+  float* tensor_data = (float*) tensor->host_data;
+  for(unsigned int i = 0; i < num_elems; i++){
+    if(tensor_data[i] != data[i]){
+      printf("Tensor data mismatch");
+      abort();
+    }
+  }
+}
+
+
+void* readInputTensor(char* file_name, int data_type, int dim1_size, int dim2_size,
+		      int dim3_size, int dim4_size){
+
+  int type_size = 4; // NOTE: Assuming floating point tensors
+  int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
+  int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size;
+  uint8_t* file_data = (uint8_t*) malloc(sizeof(char) * num_elems);
+  float* tensor_data = (float*) malloc(sizeof(float) * num_elems);
+  int file_header_size = 16;
+  
+  FILE* file = fopen(file_name, "rb");
+  if(file == NULL){
+    printf("Data file %s is not found. Aborting... \n", file_name);
+    abort();
+  }
+
+ 
+  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
+  size_t bytes_read = fread(file_data, 1, sizeof(uint8_t) * num_elems, file);
+  
+  for (size_t i = 0; i < num_elems; ++i){
+    tensor_data[i] = (float) file_data[i] / 255.0f;
+  }
+
+  printf("tensor_data[%d] = %f \n", 10, tensor_data[10]);
+
+  // NOTE: Using NCHW format
+  struct Tensor* input = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size,
+					dim3_size, dim4_size);
+  
+  initTensorData(input, tensor_data, size_in_bytes);
+  compareValues(input, tensor_data, num_elems);
+  
+  return input;  
+}
+
+
+
+struct Tensor* readTrainedWeights(char* file_name, int data_type, int dim1_size, int dim2_size,
+				  int dim3_size, int dim4_size){
+
+  // FIXIT: Don't assume floating point types
+  int type_size = 4; // NOTE: Assuming floating point tensors
+  int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
+  int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size;
+  float* tensor_data = (float*) malloc(sizeof(float) * num_elems);
+  int file_header_size = 0;
+  
+  FILE* file = fopen(file_name, "rb");
+  if(file == NULL){
+    printf("Data file %s is not found. Aborting... \n", file_name);
+    abort();
+  }
+    
+  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
+  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
+
+  //printf("tensor_data[%d] = %f \n", num_elems-1, tensor_data[num_elems-1]);
+  
+  struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size,
+					  dim3_size, dim4_size);
+  
+  initTensorData(weights, tensor_data, size_in_bytes);
+  compareValues(weights, tensor_data, num_elems);
+
+  return weights;
+}
+
+
+uint8_t* readLabels(char* labels_file, int num_labels){
+
+  int file_header_size = 8;
+  uint8_t* labels = (uint8_t*) malloc(sizeof(uint8_t) * num_labels);
+  FILE* file = fopen(labels_file, "rb");
+  if(file == NULL){
+    printf("Data file %s is not found. Aborting...\n", labels_file);
+    abort();
+  }
+
+  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
+  size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file);
+  printf("--labels bytes_read = %d \n", bytes_read);
+  return labels;
+}
+
+
+
+void computeAccuracy(char* labels_file, int num_labels, void* result_ptr){
+
+  struct Tensor* result = (struct Tensor*) result_ptr;
+  
+  uint8_t* labels = readLabels(labels_file, num_labels);
+  size_t batch_dim = result->dims.dim_sizes[0];
+  size_t channels = result->dims.dim_sizes[1];
+  float* data = (float*) result->host_data;
+  int num_errors = 0;
+  
+  for(int i = 0; i < batch_dim; i++){
+    int chosen = 0;
+    for (int id = 1; id < 10; ++id){
+      if (data[i * channels + chosen] < data[i * channels + id]) chosen = id;
+    }
+    
+    //printf("chosen = %d, label = %d \n", chosen, labels[i]);
+    if(chosen != labels[i])
+      num_errors++;
+  }
+
+  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
+  printf("****** Accuracy = %f \n\n", accuracy);
+
+
+  FILE* fp = fopen("final_accuracy", "w+");
+  if(fp != NULL){
+
+    std::ostringstream ss;
+    ss << std::fixed << accuracy;
+    std::string print_str = ss.str();
+  
+    fwrite(print_str.c_str(), 1, print_str.length(), fp);
+    fclose(fp);
+  }
+  
+}
+
+
+
+
+void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr){
+
+  struct Tensor* result = (struct Tensor*) result_ptr;
+  
+  //uint8_t* labels = readLabels(labels_file, num_labels);
+  size_t batch_dim = result->dims.dim_sizes[0];
+  size_t channels = result->dims.dim_sizes[1];
+  float* data = (float*) result->host_data;
+  int num_errors = 0;
+  
+  for(int i = 0; i < batch_dim; i++){
+    int chosen = 0;
+    for (int id = 1; id < 10; ++id){
+      if (data[i * channels + chosen] < data[i * channels + id]) chosen = id;
+    }
+    
+    //printf("chosen = %d, label = %d \n", chosen, labels[i]);
+    if(chosen != labels[i])
+      num_errors++;
+  }
+
+  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
+  printf("****** Accuracy = %f \n\n", accuracy);
+
+
+  FILE* fp = fopen("final_accuracy", "w+");
+  if(fp != NULL){
+
+    std::ostringstream ss;
+    ss << std::fixed << accuracy;
+    std::string print_str = ss.str();
+  
+    fwrite(print_str.c_str(), 1, print_str.length(), fp);
+    fclose(fp);
+  }
+  
+}
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#fc4_clipped.cc# b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#fc4_clipped.cc#
new file mode 100644
index 0000000000000000000000000000000000000000..ec409aaa42b6bd11981705764bc6af25c0fb9d01
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#fc4_clipped.cc#
@@ -0,0 +1,156 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+bool Opentuner_run = false;
+
+
+void test_4_Layer_clipped_FC(){
+
+  int total_runs = 200;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+  
+  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
+
+  int test_batch_size = 5000;
+  
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 512);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin",
+				      float_type, 1, 512, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin",
+					 float_type, 1, 1, 512, 256);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin",
+				      float_type, 1, 256, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin",
+					 float_type, 1, 1, 256, 128);  
+  void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin",
+				      float_type, 1, 128, 1, 1);
+  void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start execution profiling Tensor ops
+    startProfiling();
+  
+    // Layer-1
+    void* fc1out = tensorGemmGPU(input, fc1_weights);  
+
+    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+    
+    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
+    
+    // Layer-2
+    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  
+    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
+  
+    // Layer-3
+    void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
+  
+    void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
+  
+    void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2);
+  
+    // Layer-4
+    void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights);  
+  
+    void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
+  
+    void* fc4_relu = tensorRelu2(fc4_bias_out, 0, 2); 
+  
+    void* result = tensorSoftmax(fc4_relu);
+
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+  }
+
+  
+}
+
+
+
+int main(int argc, char* argv[]){
+ 
+  if(argc > 1)
+    Opentuner_run = true;
+  
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_4_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network.cc# b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network.cc#
new file mode 100644
index 0000000000000000000000000000000000000000..6da5842ef83bb76b83e36e69536731c3d709fefa
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network.cc#
@@ -0,0 +1,146 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+
+void testFCNetworkArchCPU(){
+
+  printf("********* Fully Connected DNN-1 ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+ 
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+  					    float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
+						  float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
+					       float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
+						  float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  //dumpWeightsToFile("tensors_out/input_fc.out", input);
+  //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights);  
+
+  printTensorDims(input);
+  printTensorDims(fc1_weights);
+
+  // Start profiling tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmCPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out);  
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out);  
+  printTensorDims(fc1_bias_out);
+
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+  //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu);  
+  printTensorDims(fc1_relu);
+  
+  // Layer-2
+  void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights);  
+  //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out);  
+  printTensorDims(fc2_bias_out);
+
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+  //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu);  
+  printTensorDims(fc2_relu);
+
+  void* result = tensorSoftmax(fc2_relu);
+  printTensorDims(result);
+
+  // stopProfiling
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testFCNetworkArchGPU(){
+
+  printf("********* Fully Connected DNN-1 ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000; 
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
+					 float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+  // Start execution profiling Tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out);  
+  printTensorDims(fc1_bias_out);
+
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+  //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu);  
+  printTensorDims(fc1_relu);
+  
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out);  
+  printTensorDims(fc2_bias_out);
+
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+  //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu);  
+  printTensorDims(fc2_relu);
+
+  void* result = tensorSoftmax(fc2_relu);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+
+int main(){
+
+  // This initializes the runtime - must be called before anything
+  initializeRuntime(0);
+
+  //testFCNetworkArchCPU();
+  testFCNetworkArchGPU();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network2.cc# b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network2.cc#
new file mode 100644
index 0000000000000000000000000000000000000000..88ceb9105059aeb2eca8f18029af674ea8b14a10
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/#test_fc_network2.cc#
@@ -0,0 +1,92 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+void test4LayerFC(){
+
+  printf("********* 4-layer FC Network ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				float_type,
+				test_batch_size, 1, 28, 28);    
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin",
+					 float_type, 1, 1, 784, 1000);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin",
+				      float_type, 1, 1000, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin",
+						  float_type, 1, 1, 1000, 500);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin",
+					       float_type, 1, 500, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin",
+						  float_type, 1, 1, 500, 200);  
+  void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin",
+					       float_type, 1, 200, 1, 1);  
+  void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin",
+						  float_type, 1, 1, 200, 10);  
+  void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  //dumpWeightsToFile("tensors_out/input_fc.out", input);
+  //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights);  
+
+  // Start Profiling execution times of Tensor operations
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out);  
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  printTensorDims(fc1_bias_out);
+  //dumpWeightsToFile("tensors_out/fc_fc1.out", fc1_bias_out);
+ 
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_bias_out, fc2_weights);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  printTensorDims(fc2_bias_out);
+
+  // Layer-3
+  void* fc3out = tensorGemmGPU(fc2_bias_out, fc3_weights);  
+  printTensorDims(fc3out);
+  
+  void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
+  printTensorDims(fc3_bias_out);
+
+  // Layer-4
+  void* fc4out = tensorGemmGPU(fc3_bias_out, fc4_weights);  
+  printTensorDims(fc4out);
+  
+  void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
+  printTensorDims(fc4_bias_out);
+ 
+  void* result = tensorSoftmax(fc4_bias_out);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+} 
+
+
+
+
+int main(){
+
+  initializeRuntime(0);
+
+  test4LayerFC();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/.#fc4_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/.#fc4_clipped.cc
new file mode 120000
index 0000000000000000000000000000000000000000..b1157c7fa0837118c14ccca2c475020504ef12a8
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/.#fc4_clipped.cc
@@ -0,0 +1 @@
+hsharif3@tyler.cs.illinois.edu.32106:1541049775
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c746e5de6116f701df7370f93969d40486e04e90
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/cifar_keras.cc
@@ -0,0 +1,203 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testCifarNet(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* CIFAR-10 DNN ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 1000;
+
+  uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size);
+  
+  void* input = readTrainedWeights("../model_params/cifar_keras/input.bin",
+			  	   float_type,
+				   test_batch_size, 3, 32, 32);
+
+  void* conv1_filter = readTrainedWeights("../model_params/cifar_keras/conv1.bin",
+					  float_type, 32, 3, 3, 3);  
+  void* conv1_bias = readTrainedWeights("../model_params/cifar_keras/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/cifar_keras/conv2.bin",
+					  float_type, 64, 32, 3, 3);  
+  void* conv2_bias = readTrainedWeights("../model_params/cifar_keras/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);
+
+  void* conv3_filter = readTrainedWeights("../model_params/cifar_keras/conv3.bin",
+					  float_type, 128, 64, 3, 3);  
+  void* conv3_bias = readTrainedWeights("../model_params/cifar_keras/conv3_bias.bin",
+					float_type, 1, 128, 1, 1);  
+  void* conv4_filter = readTrainedWeights("../model_params/cifar_keras/conv4.bin",
+					  float_type, 128, 128, 3, 3);  
+  void* conv4_bias = readTrainedWeights("../model_params/cifar_keras/conv4_bias.bin",
+					float_type, 1, 128, 1, 1);
+
+  
+  void* fc1_weights = readTrainedWeights("../model_params/cifar_keras/fc1.bin",
+					 float_type, 1, 1, 2048, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/cifar_keras/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/cifar_keras/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/cifar_keras/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performnce profiling 
+    startProfiling();
+  
+    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+    // NOTE: 'SAME' convolution
+    void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+
+    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+
+    void* conv1_tanh = tensorTanh(conv1out);
+
+
+    // 2nd Layer
+    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+
+    void* conv2_tanh = tensorTanh(conv2out);
+
+    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
+      
+
+    // 3rd Layer
+    void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+    tensorAdd(conv3out, conv3_bias); // NOTE: In place operation
+  
+    void* conv3_tanh = tensorTanh(conv3out);
+
+    void* pool3out = tensorPooling(conv3_tanh, 0, 2, 2, 0, 0, 2, 2);
+
+
+    // 4th Layer
+    void* conv4out = tensorConvolution(pool3out, conv4_filter, 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+    tensorAdd(conv4out, conv4_bias); // NOTE: In place operation
+  
+    void* conv4_tanh = tensorTanh(conv4out);
+
+    void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
+
+
+    printTensorDims(pool4out);
+    
+ 
+    void* gemm1out = tensorGemmGPU(pool4out, fc1_weights);  
+
+    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+
+    void* tanh1out = tensorTanh(gemm1biasout);
+  
+    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
+  
+    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+
+    void* tanh2out = tensorTanh(gemm2_biasout);
+  
+    void* result = tensorSoftmax(tanh2out);
+
+    printTensorDims(result);
+    
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testCifarNet();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
new file mode 100644
index 0000000000000000000000000000000000000000..de19b94b55e878be4f5edf133416b079fab4b8bd
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
@@ -0,0 +1,132 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+bool Opentuner_run = false;
+
+void test_2_Layer_clipped_FC(){
+
+  int total_runs = 10;
+
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start execution profiling Tensor ops
+    startProfiling();
+    
+    // Layer-1
+    void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  
+    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  
+    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
+  
+    // Layer-2
+    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  
+    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
+  
+    void* result = tensorSoftmax(fc2_relu);
+
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();
+   
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }    
+  }
+  
+}
+
+
+// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+  
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_2_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a66c4b3ec3ab8380709537d57511a4643adc757a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
@@ -0,0 +1,151 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+
+
+bool Opentuner_run = false;
+
+
+void test_3_Layer_clipped_FC(){
+
+
+  int total_runs = 10000;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
+
+  int test_batch_size = 5000;
+
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 256);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin",
+				      float_type, 1, 256, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin",
+					 float_type, 1, 1, 256, 128);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+    // Start execution profiling Tensor ops
+    startProfiling();
+
+    
+    // Layer-1
+    void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  
+    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+ 
+    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
+ 
+    // Layer-2
+    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+ 
+    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
+ 
+    // Layer-3
+    void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
+  
+    void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
+ 
+    void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2);
+  
+    void* result = tensorSoftmax(fc3_relu);
+
+    
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();
+   
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+
+  }
+  
+  
+}
+
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_3_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ec409aaa42b6bd11981705764bc6af25c0fb9d01
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
@@ -0,0 +1,156 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+bool Opentuner_run = false;
+
+
+void test_4_Layer_clipped_FC(){
+
+  int total_runs = 200;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+  
+  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
+
+  int test_batch_size = 5000;
+  
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 512);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin",
+				      float_type, 1, 512, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin",
+					 float_type, 1, 1, 512, 256);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin",
+				      float_type, 1, 256, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin",
+					 float_type, 1, 1, 256, 128);  
+  void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin",
+				      float_type, 1, 128, 1, 1);
+  void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start execution profiling Tensor ops
+    startProfiling();
+  
+    // Layer-1
+    void* fc1out = tensorGemmGPU(input, fc1_weights);  
+
+    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+    
+    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
+    
+    // Layer-2
+    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  
+    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
+  
+    // Layer-3
+    void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
+  
+    void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
+  
+    void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2);
+  
+    // Layer-4
+    void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights);  
+  
+    void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
+  
+    void* fc4_relu = tensorRelu2(fc4_bias_out, 0, 2); 
+  
+    void* result = tensorSoftmax(fc4_relu);
+
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+  }
+
+  
+}
+
+
+
+int main(int argc, char* argv[]){
+ 
+  if(argc > 1)
+    Opentuner_run = true;
+  
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_4_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc
new file mode 100644
index 0000000000000000000000000000000000000000..44c03aab875a6de4af6c87776241295cd1fd673b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc2_half.cc
@@ -0,0 +1,137 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+#include "../../include/types.h"
+
+
+bool Opentuner_run = false;
+
+
+void test_2_Layer_clipped_FC(){
+
+  int total_runs = 1;
+
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start execution profiling Tensor ops
+    startProfiling();
+    
+    // Layer-1
+    void* fc1out = tensorHalfGemm(input, fc1_weights);  
+  
+    void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias);
+  
+    void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2);
+  
+    // Layer-2
+    void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias);
+  
+    void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2);
+  
+    void* result = tensorSoftmax(fc2_relu);
+
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();
+   
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+  
+}
+
+
+// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+  
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_2_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc
new file mode 100644
index 0000000000000000000000000000000000000000..697fea9b8aa61a8c3cf5ec3e8d0d66466df9b1e8
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc3_half.cc
@@ -0,0 +1,151 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+#include "../../include/types.h"
+
+
+
+
+bool Opentuner_run = false;
+
+
+void test_3_Layer_clipped_FC(){
+
+
+  int total_runs = 1000;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
+
+  int test_batch_size = 5000;
+
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 256);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin",
+				      float_type, 1, 256, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin",
+					 float_type, 1, 1, 256, 128);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+    // Start execution profiling Tensor ops
+    startProfiling();
+
+    
+    // Layer-1
+    void* fc1out = tensorHalfGemm(input, fc1_weights);  
+  
+    void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias);
+ 
+    void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2);
+ 
+    // Layer-2
+    void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias);
+ 
+    void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2);
+ 
+    // Layer-3
+    void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights);  
+  
+    void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias);
+ 
+    void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2);
+  
+    void* result = tensorSoftmax(fc3_relu);
+
+    
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();
+   
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+
+  }
+  
+  
+}
+
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_3_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ad999165cfd4148479de58e24fed8291161da491
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/fc4_half.cc
@@ -0,0 +1,156 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+#include "../../include/types.h"
+
+
+bool Opentuner_run = false;
+
+
+void test_4_Layer_clipped_FC(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+  
+  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
+
+  int test_batch_size = 5000;
+  
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 512);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin",
+				      float_type, 1, 512, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin",
+					 float_type, 1, 1, 512, 256);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin",
+				      float_type, 1, 256, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin",
+					 float_type, 1, 1, 256, 128);  
+  void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin",
+				      float_type, 1, 128, 1, 1);
+  void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start execution profiling Tensor ops
+    startProfiling();
+  
+    // Layer-1
+    void* fc1out = tensorHalfGemm(input, fc1_weights);  
+
+    void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias);
+    
+    void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2);
+    
+    // Layer-2
+    void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights);  
+  
+    void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias);
+  
+    void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2);
+  
+    // Layer-3
+    void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights);  
+  
+    void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias);
+  
+    void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2);
+  
+    // Layer-4
+    void* fc4out = tensorHalfGemm(fc3_relu, fc4_weights);  
+  
+    void* fc4_bias_out = tensorHalfAdd(fc4out, fc4_bias);
+  
+    void* fc4_relu = tensorHalfRelu2(fc4_bias_out, 0, 2); 
+  
+    void* result = tensorSoftmax(fc4_relu);
+
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+  }
+
+  
+}
+
+
+
+int main(int argc, char* argv[]){
+ 
+  if(argc > 1)
+    Opentuner_run = true;
+  
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_4_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc
new file mode 100644
index 0000000000000000000000000000000000000000..dd68f2b48eb66456061bb93decc1cbd985887be0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_keras_half.cc
@@ -0,0 +1,171 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetTanh(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performnce profiling 
+    startProfiling();
+  
+    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+    // NOTE: 'SAME' convolution
+    void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+
+    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+    tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation
+
+    void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv1_tanh = tensorHalfTanh(pool1out);
+
+    // NOTE: input channels have to match between tensor op inputs and outputs 
+    void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+    tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation
+
+    void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv2_tanh = tensorHalfTanh(pool2out);
+
+    void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights);  
+
+    void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias);
+
+    void* tanh1out = tensorHalfTanh(gemm1biasout);
+  
+    void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights);  
+  
+    void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias);
+
+    void* tanh2out = tensorHalfTanh(gemm2_biasout);
+  
+    void* result = tensorSoftmax(tanh2out);
+
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenetTanh();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bb45b14d62e061e704b252aa44e602e0c1d08ba7
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_tanh_half.cc
@@ -0,0 +1,173 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+#include "../../include/types.h"
+
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetTanh(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performnce profiling 
+    startProfiling();
+  
+    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+    // NOTE: 'SAME' convolution
+    void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+
+    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+    tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation
+
+    void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv1_tanh = tensorHalfTanh(pool1out);
+
+    // NOTE: input channels have to match between tensor op inputs and outputs 
+    void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+    tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation
+
+    void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv2_tanh = tensorHalfTanh(pool2out);
+
+    void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights);  
+
+    void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias);
+
+    void* tanh1out = tensorHalfTanh(gemm1biasout);
+  
+    void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights);  
+  
+    void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias);
+
+    void* tanh2out = tensorHalfTanh(gemm2_biasout);
+  
+    void* result = tensorSoftmax(tanh2out);
+
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenetTanh();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d2d663552fdab6366f28655ca835ba63cb4fcee4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet2_tanh.cc
@@ -0,0 +1,171 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetTanh(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performnce profiling 
+    startProfiling();
+  
+    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+    // NOTE: 'SAME' convolution
+    void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+
+    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+
+    void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv1_tanh = tensorTanh(pool1out);
+
+    // NOTE: input channels have to match between tensor op inputs and outputs 
+    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+
+    void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv2_tanh = tensorTanh(pool2out);
+
+    void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights);  
+
+    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+
+    void* tanh1out = tensorTanh(gemm1biasout);
+  
+    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
+  
+    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+
+    void* tanh2out = tensorTanh(gemm2_biasout);
+  
+    void* result = tensorSoftmax(tanh2out);
+
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenetTanh();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b6854f4563e34b375991b131b04c77e6ba83f577
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
@@ -0,0 +1,171 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetTanh(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performnce profiling 
+    startProfiling();
+  
+    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+    // NOTE: 'SAME' convolution
+    void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+
+    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+
+    void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv1_tanh = tensorTanh(pool1out);
+
+    // NOTE: input channels have to match between tensor op inputs and outputs 
+    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+
+    void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+
+    void* conv2_tanh = tensorTanh(pool2out);
+
+    void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights);  
+
+    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+
+    void* tanh1out = tensorTanh(gemm1biasout);
+  
+    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
+  
+    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+
+    void* tanh2out = tensorTanh(gemm2_biasout);
+  
+    void* result = tensorSoftmax(tanh2out);
+
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenetTanh();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/fc2_clipped_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/fc2_clipped_promise.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d7addd7283e24bedfc32d57d84c4ce17d9966f57
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/fc2_clipped_promise.cc
@@ -0,0 +1,80 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+#include "../../include/types.h"
+
+
+
+void test_2_Layer_clipped_FC(){
+
+  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+  
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+				   float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin",
+					 float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+  // Start execution profiling Tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  dumpWeightsToFile("tensors_out2/fc1out.out", fc1out);  
+
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  printTensorDims(fc1_bias_out);
+  dumpWeightsToFile("tensors_out2/fc1_bias.out", fc1_bias_out);  
+
+  void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
+  printTensorDims(fc1_relu);
+  dumpWeightsToFile("tensors_out2/fc1_clipped_relu.out", fc1_relu);  
+
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  printTensorDims(fc2out);
+  dumpWeightsToFile("tensors_out2/fc2out.out", fc2out);  
+
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  printTensorDims(fc2_bias_out);
+
+  void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
+  printTensorDims(fc2_relu);
+
+  void* result = tensorSoftmax(fc2_relu);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+}
+
+
+
+int main(){
+
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  test_2_Layer_clipped_FC();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d5211be3918adcd030fc40c13cba1ff0d7c53c18
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2.cc
@@ -0,0 +1,112 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenet2Arch(){
+
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  // Start power and performnce profiling 
+  startProfiling();
+  
+  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+  // NOTE: 'SAME' convolution
+  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+
+  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+  printTensorDims(conv1out);
+
+  void* conv1_reluout = tensorRelu(conv1out);
+  //dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout);  
+
+  void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool1out);
+  //dumpWeightsToFile("tensors_out/pool1.out", pool1out);  
+  // NOTE: input channels have to match between tensor op inputs and outputs 
+  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+  printTensorDims(conv2out);
+
+  void* conv2_reluout = tensorRelu(conv2out);
+  //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout);  
+
+  void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool2out);
+  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
+  
+  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
+  printTensorDims(gemm1out);
+  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
+  
+  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+  printTensorDims(gemm1biasout);
+
+  void* relu1out = tensorRelu(gemm1biasout);
+  printTensorDims(relu1out);
+  
+  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
+  printTensorDims(gemm2out);
+  
+  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+  printTensorDims(gemm2_biasout);
+  
+  void* result = tensorSoftmax(gemm2_biasout);
+  printTensorDims(result);
+
+  // End profiling and dump output to profile.txt
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
+		  test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenet2Arch();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2_promise.cc
new file mode 100644
index 0000000000000000000000000000000000000000..358cb6a75b8e63ca0a0bd964c9f73f2d16c39b4f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet2_promise.cc
@@ -0,0 +1,113 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenet2Arch(){
+
+  printf("********* Lenet-2 Architecture ********** \n");
+
+  int test_batch_size = 10000;  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  // Start power and performnce profiling 
+  startProfiling();
+  
+  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+  // NOTE: 'SAME' convolution
+  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+  dumpWeightsToFile("tensors_out/conv1_out.out", conv1out);  
+
+  tensorAdd(conv1out, conv1_bias);  // NOTE: In-place operation
+  printTensorDims(conv1out);
+
+  dumpWeightsToFile("tensors_out/conv1_bias_add.out", conv1out);  
+
+  void* conv1_reluout = tensorRelu(conv1out);
+  dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout);  
+
+  void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool1out);
+  dumpWeightsToFile("tensors_out/conv1_pool.out", pool1out);
+  
+  // NOTE: input channels have to match between tensor op inputs and outputs 
+  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+  printTensorDims(conv2out);
+
+  void* conv2_reluout = tensorRelu(conv2out);
+  //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout);  
+
+  void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool2out);
+  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
+  
+  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
+  printTensorDims(gemm1out);
+  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
+  
+  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+  printTensorDims(gemm1biasout);
+
+  void* relu1out = tensorRelu(gemm1biasout);
+  printTensorDims(relu1out);
+  
+  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
+  printTensorDims(gemm2out);
+  
+  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+  printTensorDims(gemm2_biasout);
+  
+  void* result = tensorSoftmax(gemm2_biasout);
+  printTensorDims(result);
+
+  // End profiling and dump output to profile.txt
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
+		  test_batch_size, result);
+}
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenet2Arch();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet_acc.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet_acc.cc
new file mode 100644
index 0000000000000000000000000000000000000000..42e364289e499d92591692a04e42988fd1a66dc5
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test/test_lenet_acc.cc
@@ -0,0 +1,109 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenet2Arch(){
+
+  printf("********* Lenet-2 Architecture ********** \n");
+
+  int test_batch_size = 1000;  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_test_params/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_test_params/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_test_params/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_test_params/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_test_params/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_test_params/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_test_params/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_test_params/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  // Start power and performnce profiling 
+  startProfiling();
+  
+  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+  // NOTE: 'SAME' convolution
+  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+
+  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+  printTensorDims(conv1out);
+
+  void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool1out);
+  //dumpWeightsToFile("tensors_out/pool1.out", pool1out);  
+  // NOTE: input channels have to match between tensor op inputs and outputs 
+  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+  printTensorDims(conv2out);
+
+  //void* conv2_reluout = tensorRelu(conv2out);
+
+  void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool2out);
+  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
+  
+  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
+  printTensorDims(gemm1out);
+  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
+  
+  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+  printTensorDims(gemm1biasout);
+
+  void* relu1out = tensorRelu(gemm1biasout);
+  printTensorDims(relu1out);
+  
+  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
+  printTensorDims(gemm2out);
+  
+  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+  printTensorDims(gemm2_biasout);
+  
+  void* result = tensorSoftmax(gemm2_biasout);
+  printTensorDims(result);
+
+  // End profiling and dump output to profile.txt
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
+		  test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenet2Arch();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_alexnet.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_alexnet.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a8129a1e459a15e26f595972724451e01d81b0a1
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_alexnet.cc
@@ -0,0 +1,480 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem
+
+/*void testAlexnet2(){
+
+  struct Tensor* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 100, 3, 224, 224);
+  initTensorValues(input);
+
+  struct Tensor* conv1filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 96, 3, 11, 11);
+  printTensorDims(conv1filter);
+
+  /****** Start of Layer 1 ***************
+  
+  // NOTE: Padding for same conv is computed as P = (F - 1 /2)
+  struct Tensor* conv1out = tensorConvolution(input, conv1filter, 5, 5, 4, 4);
+  printTensorDims(conv1out);
+
+  struct Tensor* conv1bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 96, 1, 1);
+  struct Tensor* conv1bias_out = tensorAdd(conv1out, conv1bias); 
+  
+  struct Tensor* relu1out = tensorRelu(conv1bias_out);
+  
+  // NOTE: These parameters are a deviation from the original paper
+  // The parameters match the alexnet TF model
+  // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal
+  unsigned int LRN_window = 5;
+  double LRN_alpha = 2e-05;
+  double LRN_beta = 0.75;
+  double LRN_k = 1.0; 
+  struct Tensor* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
+  printTensorDims(lrn1out);
+
+  struct Tensor* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2);
+  
+  /****** End of Conv Layer 1 **********
+
+  struct Tensor** splits = tensorSplit(maxpool1out, 2, 1);
+ 
+  struct Tensor* conv2W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 48, 5, 5);
+  struct Tensor** conv2fils = tensorSplit(conv2W, 2, 0);
+
+  struct Tensor* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1);
+  printTensorDims(conv2a_out);
+
+  struct Tensor* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1);
+  printTensorDims(conv2b_out);
+ 
+  struct Tensor* conv2_outs[2];
+  conv2_outs[0] = conv2a_out;
+  conv2_outs[1] = conv2b_out;
+
+  struct Tensor* conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
+  printTensorDims(conv2_concat_out);
+
+  struct Tensor* conv2bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1);
+  struct Tensor* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); 
+  struct Tensor* relu2out = tensorRelu(conv2bias_out);  
+  struct Tensor* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
+  printTensorDims(lrn2out);
+    
+  struct Tensor* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2);
+  printTensorDims(maxpool2out);
+
+  /******** End of Conv Layer 2 ************
+
+  struct Tensor* conv3filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 256, 3, 3);
+  struct Tensor* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1);
+  
+  struct Tensor* conv3bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1);
+  struct Tensor* conv3bias_out = tensorAdd(conv3_out, conv3bias); 
+  struct Tensor* relu3out = tensorRelu(conv3bias_out);  
+  printTensorDims(relu3out);
+
+  /********* End of Conv layer 3 ******
+
+  struct Tensor** splits2 = tensorSplit(relu3out, 2, 1);
+
+  struct Tensor* conv4W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 192, 3, 3);
+  struct Tensor** conv4fils = tensorSplit(conv4W, 2, 0);
+
+  printTensorDims(splits2[0]);
+  printTensorDims(conv4fils[0]);
+  
+  struct Tensor* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1);
+  printTensorDims(conv4a_out);
+
+  struct Tensor* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1);
+  printTensorDims(conv4b_out);
+ 
+  struct Tensor* conv4_outs[2];
+  conv4_outs[0] = conv4a_out;
+  conv4_outs[1] = conv4b_out;
+
+  struct Tensor* conv4_concat_out = tensorConcat(conv4_outs, 2, 1);
+  printTensorDims(conv4_concat_out);
+
+  struct Tensor* conv4bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1);
+  struct Tensor* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias); 
+  struct Tensor* relu4out = tensorRelu(conv4bias_out);  
+  printTensorDims(relu4out);
+  
+  /********* End of Conv layer 4 ******
+
+  struct Tensor** splits3 = tensorSplit(relu4out, 2, 1);
+
+  struct Tensor* conv5W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 192, 3, 3);
+  struct Tensor** conv5fils = tensorSplit(conv5W, 2, 0);
+
+  printTensorDims(splits3[0]);
+  printTensorDims(conv5fils[0]);
+  
+  struct Tensor* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1);
+  printTensorDims(conv5a_out);
+
+  struct Tensor* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1);
+  printTensorDims(conv5b_out);
+ 
+  struct Tensor* conv5_outs[2];
+  conv5_outs[0] = conv5a_out;
+  conv5_outs[1] = conv5b_out;
+
+  struct Tensor* conv5_concat_out = tensorConcat(conv5_outs, 2, 1);
+  printTensorDims(conv5_concat_out);
+
+  struct Tensor* conv5bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1);
+  struct Tensor* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); 
+  struct Tensor* relu5out = tensorRelu(conv5bias_out);  
+  printTensorDims(relu5out);
+
+  struct Tensor* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2);
+  printTensorDims(maxpool5out);
+
+  /********* End of Conv layer 5 ******
+
+  struct Tensor* fc1_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+					      1, 1, 256*6*6, 4096);
+  struct Tensor* gemm1out = tensorGemm(maxpool5out, fc1_weights);  
+  printTensorDims(gemm1out);
+
+  struct Tensor* bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+				       1, 1, 1, 4096);
+  struct Tensor* gemm1biasout = tensorGemmBias(gemm1out, bias);
+  printTensorDims(gemm1biasout);
+
+  struct Tensor* relu6out = tensorRelu(gemm1biasout);  
+  printTensorDims(relu6out);
+
+  /***** End of FC1 layer ********
+
+  struct Tensor* fc2_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+					      1, 1, 4096, 4096);
+  struct Tensor* gemm2out = tensorGemm(relu6out, fc2_weights);  
+  printTensorDims(gemm2out);
+
+  struct Tensor* bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+				       1, 1, 1, 4096);
+  struct Tensor* gemm2biasout = tensorGemmBias(gemm2out, bias2);
+  printTensorDims(gemm2biasout);
+
+  struct Tensor* relu7out = tensorRelu(gemm2biasout);  
+  printTensorDims(relu7out);
+
+  /***** End of FC2 layer ********
+
+  struct Tensor* fc3_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+					      1, 1, 4096, 1000);
+  struct Tensor* gemm3out = tensorGemm(relu7out, fc3_weights);  
+  printTensorDims(gemm3out);
+
+  struct Tensor* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+				       1, 1, 1, 1000);
+  struct Tensor* gemm3biasout = tensorGemmBias(gemm3out, bias3);
+  printTensorDims(gemm3biasout);
+
+  /******** End of FC3 Layer **********
+  struct Tensor* result = tensorSoftmax(gemm3biasout);
+  printTensorDims(result);
+  
+} */
+
+
+
+void printLikelihood(char* labels_file, int num_labels, void* result_ptr){
+
+  struct Tensor* result = (struct Tensor*) result_ptr;
+  
+  size_t batch_dim = result->dims.dim_sizes[0];
+  size_t channels = result->dims.dim_sizes[1];
+  float* data = (float*) result->host_data;
+  
+  for(int i = 0; i < batch_dim; i++){
+    int chosen = 0;
+    for (int id = 1; id < channels; ++id){
+      if (data[i * channels + chosen] < data[i * channels + id]) chosen = id;
+    }
+
+    printf("** chosen = %d, label = %f, label+3 = %f \n",
+	   chosen, data[chosen], data[chosen+3]);   
+  }
+
+  //float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
+  //printf("****** Accuracy = %f \n\n", accuracy);  
+}
+
+
+//--- Results not matching
+// *** CHECK:
+// 1) cudnnCrossCorrelation vs cudnnConvolution
+// 2) Weights
+// 3) Tensor outputs
+// 4) Data layouts
+
+
+
+/*** NOTE: REFERECNCE ARCHITECTURE **/
+// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem
+void testAlexnet3(){
+
+  int test_batch_size = 2;
+  int conv_mode = 1; // CROSS_CORRELATION matches the TF conv2d implementation
+  int conv_precision = 0; // floating point precision for convolution
+ 
+  printf("****** AlexNet Architecture 3 ******** \n\n");
+  void* input = readTrainedWeights("../alexnet/params/combined_imgs.bin",
+				   CUDNN_DATA_FLOAT, test_batch_size, 3, 227, 227);
+  dumpWeightsToFile("tensors_out/input.out", input); 
+    
+  /****** Start of Layer 1 ****************/  
+  void* conv1filter = readTrainedWeights("../alexnet/params/conv1.bin",
+					 CUDNN_DATA_FLOAT, 96, 3, 11, 11);
+
+  printTensorDims(conv1filter);
+  dumpWeightsToFile("tensors_out/conv1filter.out", conv1filter); 
+  
+  // NOTE: the trained model does NOT have any padding in this conv
+  void* conv1out = tensorConvolution(input, conv1filter, 4, 4, 4, 4,
+				     conv_mode, conv_precision);
+  printTensorDims(conv1out);
+  
+  void* conv1bias = readTrainedWeights("../alexnet/params/conv1.bias.bin",
+				       CUDNN_DATA_FLOAT, 1, 96, 1, 1);
+  void* conv1bias_out = tensorAdd(conv1out, conv1bias);
+
+  dumpWeightsToFile("tensors_out/conv1_init.out", conv1out);
+   
+  void* relu1out = tensorRelu(conv1bias_out);
+  printTensorDims(relu1out);
+  dumpWeightsToFile("tensors_out/conv1.out", relu1out);
+  
+  // NOTE: These parameters are a deviation from the original paper
+  // The parameters match the alexnet TF model
+  // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal
+  unsigned int LRN_window = 5;
+  double LRN_alpha = 2e-05 * LRN_window;
+  double LRN_beta = 0.75;
+  double LRN_k = 1.0;
+
+   // TEST-point - Compare TF vs CUDNN
+  void* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
+  printTensorDims(lrn1out);
+  dumpWeightsToFile("tensors_out/lrn1.out", lrn1out);
+    
+  void* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2);
+  printTensorDims(maxpool1out);  
+  dumpWeightsToFile("tensors_out/maxpool1.out", maxpool1out);
+  
+  /****** End of Conv Layer 1 ***********/
+  
+  // TEST-point
+  void** splits = tensorSplit(maxpool1out, 2, 1);
+
+  void* concat_test1 = tensorConcat(splits, 2, 1);
+  compareTensors(maxpool1out, concat_test1);
+  
+  void* conv2W = readTrainedWeights("../alexnet/params/conv2.bin",
+				    CUDNN_DATA_FLOAT, 256, 48, 5, 5);
+
+  dumpWeightsToFile("tensors_out/conv2filter.out", conv2W); 
+  
+  // TEST point - compare split convolution across TF vs cuDNN
+  void** conv2fils = tensorSplit(conv2W, 2, 0);
+
+  void* concat_test2 = tensorConcat(conv2fils, 2, 0);
+  compareTensors(conv2W, concat_test2);
+  
+  // NOTE: Padding for same conv is computed as P = ((F - 1) / 2)
+  void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+  printTensorDims(conv2a_out);
+
+  void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+  printTensorDims(conv2b_out);
+ 
+  void* conv2_outs[2];
+  conv2_outs[0] = conv2a_out;
+  conv2_outs[1] = conv2b_out;
+
+  // Test point
+  void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
+  printTensorDims(conv2_concat_out);
+  dumpWeightsToFile("tensors_out/conv2_init.out", conv2_concat_out); 
+  
+  void* conv2bias = readTrainedWeights("../alexnet/params/conv2.bias.bin",
+				       CUDNN_DATA_FLOAT, 1, 256, 1, 1);  
+  void* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias);
+  printTensorDims(conv2bias_out);
+
+  dumpWeightsToFile("tensors_out/conv2_bias_init.out", conv2bias_out); 
+
+  void* relu2out = tensorRelu(conv2bias_out);
+  dumpWeightsToFile("tensors_out/conv2.out", relu2out); 
+  printTensorDims(relu2out);
+ 
+  void* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
+  printTensorDims(lrn2out);
+    
+  void* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2);
+  printTensorDims(maxpool2out);
+  
+  /******** End of Conv Layer 2 *************/
+
+  void* conv3filter = readTrainedWeights("../alexnet/params/conv3.bin",
+					 CUDNN_DATA_FLOAT, 384, 256, 3, 3);   
+  void* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1,
+				      conv_mode, conv_precision);
+  
+  void* conv3bias = readTrainedWeights("../alexnet/params/conv3.bias.bin",
+				       CUDNN_DATA_FLOAT, 1, 384, 1, 1);
+  void* conv3bias_out = tensorAdd(conv3_out, conv3bias); 
+  void* relu3out = tensorRelu(conv3bias_out);
+  dumpWeightsToFile("tensors_out/conv3.out", relu3out);  
+  printTensorDims(relu3out);
+
+  /********* End of Conv layer 3 *******/
+
+  void** splits2 = tensorSplit(relu3out, 2, 1);
+
+  void* conv4W = readTrainedWeights("../alexnet/params/conv4.bin",
+				    CUDNN_DATA_FLOAT, 384, 192, 3, 3);   
+  void** conv4fils = tensorSplit(conv4W, 2, 0);
+
+  printTensorDims(splits2[0]);
+  printTensorDims(conv4fils[0]);
+
+  // Test-point DOES the pairing of splits and filters make sense?
+  void* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+  printTensorDims(conv4a_out);
+
+  void* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+  printTensorDims(conv4b_out);
+ 
+  void* conv4_outs[2];
+  conv4_outs[0] = conv4a_out;
+  conv4_outs[1] = conv4b_out;
+
+  void* conv4_concat_out = tensorConcat(conv4_outs, 2, 1);
+  printTensorDims(conv4_concat_out);
+
+  void* conv4bias = readTrainedWeights("../alexnet/params/conv4.bias.bin",
+						  CUDNN_DATA_FLOAT, 1, 384, 1, 1);
+  void* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias);
+
+  void* relu4out = tensorRelu(conv4bias_out);  
+  printTensorDims(relu4out);
+  
+  /********* End of Conv layer 4 *******/
+
+  void** splits3 = tensorSplit(relu4out, 2, 1);
+
+  void* conv5W = readTrainedWeights("../alexnet/params/conv5.bin",
+					     CUDNN_DATA_FLOAT, 256, 192, 3, 3);  
+  void** conv5fils = tensorSplit(conv5W, 2, 0);
+
+  printTensorDims(splits3[0]);
+  printTensorDims(conv5fils[0]);
+  
+  void* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+  printTensorDims(conv5a_out);
+
+  void* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+  printTensorDims(conv5b_out);
+ 
+  void* conv5_outs[2];
+  conv5_outs[0] = conv5a_out;
+  conv5_outs[1] = conv5b_out;
+
+  void* conv5_concat_out = tensorConcat(conv5_outs, 2, 1);
+  printTensorDims(conv5_concat_out);
+
+  void* conv5bias = readTrainedWeights("../alexnet/params/conv5.bias.bin",
+				       CUDNN_DATA_FLOAT, 1, 256, 1, 1);
+  void* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); 
+  void* relu5out = tensorRelu(conv5bias_out);  
+  printTensorDims(relu5out);
+
+  void* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2);
+  printTensorDims(maxpool5out);
+
+  /********* End of Conv layer 5 *******/
+
+  // Test-point: I suspect the data may not be layed out correct (either in file or after loading)
+  void* fc1_weights = readTrainedWeights("../alexnet/params/fc1.bin",
+					 CUDNN_DATA_FLOAT, 1, 1, 256*6*6, 4096);
+  void* gemm1out = tensorGemm(maxpool5out, fc1_weights);  
+  printTensorDims(gemm1out);
+
+  void* bias = readTrainedWeights("../alexnet/params/fc1.bias.bin",
+		                           CUDNN_DATA_FLOAT, 1, 1, 1, 4096);
+    
+  void* gemm1biasout = tensorGemmBias(gemm1out, bias);
+  printTensorDims(gemm1biasout);
+
+  void* relu6out = tensorRelu(gemm1biasout);  
+  printTensorDims(relu6out);
+
+  /***** End of FC1 layer *********/
+
+  void* fc2_weights = readTrainedWeights("../alexnet/params/fc2.bin",
+						  CUDNN_DATA_FLOAT, 1, 1, 4096, 4096);
+  void* gemm2out = tensorGemm(relu6out, fc2_weights);  
+  printTensorDims(gemm2out);
+
+  void* bias2 = readTrainedWeights("../alexnet/params/fc2.bias.bin",
+					    CUDNN_DATA_FLOAT, 1, 1, 1, 4096);
+  void* gemm2biasout = tensorGemmBias(gemm2out, bias2);
+  printTensorDims(gemm2biasout);
+
+  void* relu7out = tensorRelu(gemm2biasout);  
+  printTensorDims(relu7out);
+
+  /***** End of FC2 layer *********/
+
+  void* fc3_weights = readTrainedWeights("../alexnet/params/fc3.bin",
+						  CUDNN_DATA_FLOAT, 1, 1, 4096, 1000);  
+  void* gemm3out = tensorGemm(relu7out, fc3_weights);  
+  printTensorDims(gemm3out);
+
+  void* bias3 = readTrainedWeights("../alexnet/params/fc3.bias.bin",
+				            CUDNN_DATA_FLOAT, 1, 1, 1, 1000);
+  void* gemm3biasout = tensorGemmBias(gemm3out, bias3);
+  printTensorDims(gemm3biasout);
+
+  /******** End of FC3 Layer ***********/
+  void* result = tensorSoftmax(gemm3biasout);
+  printTensorDims(result);
+
+  // FIXIT: Pass file with the labels
+  printLikelihood("", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+  printf("END of Alexnet3 -- \n");  
+}
+
+
+
+
+
+int main(){
+
+  // IMP-NOTE: Always initialize the runtime
+  initializeRuntime(0);
+
+  //testAlexnet1();
+  //testAlexnet2();
+  testAlexnet3(); 
+
+  return 0;
+}
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_half.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c0fee9b659db9ff45f56b75b989fbbed68523d43
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_half.cc
@@ -0,0 +1,74 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testFC_half(){
+
+  printf("********* Fully Connected DNN-1 ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000; 
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+  					    float_type, test_batch_size, 1, 28, 28);  
+
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
+						  float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
+					       float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
+						  float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  // Start execution profiling Tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorHgemm(input, fc1_weights);  
+  printTensorDims(fc1out);
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  printTensorDims(fc1_bias_out);
+
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+  printTensorDims(fc1_relu);
+  
+  // Layer-2
+  void* fc2out = tensorHgemm(fc1_relu, fc2_weights);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  printTensorDims(fc2_bias_out);
+
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+  printTensorDims(fc2_relu);
+
+  void* result = tensorSoftmax(fc2_relu);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
+		  test_batch_size, result);
+}
+
+
+
+int main(){
+
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  testFC_half();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e8b70146a10359bf2df7420ae388325e6a658557
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network.cc
@@ -0,0 +1,152 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+
+void testFCNetworkArchCPU(){
+
+  printf("********* Fully Connected DNN-1 ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+ 
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+  					    float_type, test_batch_size, 1, 28, 28);  
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
+						  float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
+					       float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
+						  float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  //dumpWeightsToFile("tensors_out/input_fc.out", input);
+  //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights);  
+
+  printTensorDims(input);
+  printTensorDims(fc1_weights);
+
+  // Start profiling tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmCPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out);  
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out);  
+  printTensorDims(fc1_bias_out);
+
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+  //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu);  
+  printTensorDims(fc1_relu);
+ 
+  // Layer-2
+  void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights);  
+  //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out);  
+  printTensorDims(fc2_bias_out);
+
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+  //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu);  
+  printTensorDims(fc2_relu);
+
+  void* result = tensorSoftmax(fc2_relu);
+  printTensorDims(result);
+
+  // stopProfiling
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testFCNetworkArchGPU(){
+
+  printf("********* Fully Connected DNN-1 ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000; 
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+  					    float_type, test_batch_size, 1, 28, 28);  
+
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
+						  float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
+					       float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
+						  float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  // Start execution profiling Tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out);  
+  printTensorDims(fc1_bias_out);
+
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+  //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu);  
+  printTensorDims(fc1_relu);
+
+  // IMPORTANT: Adding errors to the FC1 layer output
+  //tensorAddError(fc1_relu, 3);
+ 
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out);  
+  printTensorDims(fc2_bias_out);
+
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+  //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu);  
+  printTensorDims(fc2_relu);
+
+  void* result = tensorSoftmax(fc2_relu);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+
+int main(){
+
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  //testFCNetworkArchCPU();
+
+  testFCNetworkArchGPU();
+
+  llvm_hpvm_cleanupTensorRt();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network2.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fc00532a1b3712fab9d098a9a8e1a1586f1458a5
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network2.cc
@@ -0,0 +1,94 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+void test4LayerFC(){
+
+  printf("********* 4-layer FC Network ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				float_type,
+				test_batch_size, 1, 28, 28);    
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin",
+					 float_type, 1, 1, 784, 1000);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin",
+				      float_type, 1, 1000, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin",
+						  float_type, 1, 1, 1000, 500);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin",
+					       float_type, 1, 500, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin",
+						  float_type, 1, 1, 500, 200);  
+  void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin",
+					       float_type, 1, 200, 1, 1);  
+  void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin",
+						  float_type, 1, 1, 200, 10);  
+  void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  //dumpWeightsToFile("tensors_out/input_fc.out", input);
+  //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights);  
+
+  // Start Profiling execution times of Tensor operations
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out);  
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  printTensorDims(fc1_bias_out);
+  //dumpWeightsToFile("tensors_out/fc_fc1.out", fc1_bias_out);
+ 
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_bias_out, fc2_weights);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  printTensorDims(fc2_bias_out);
+
+  // Layer-3
+  void* fc3out = tensorGemmGPU(fc2_bias_out, fc3_weights);  
+  printTensorDims(fc3out);
+  
+  void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
+  printTensorDims(fc3_bias_out);
+
+  // Layer-4
+  void* fc4out = tensorGemmGPU(fc3_bias_out, fc4_weights);  
+  printTensorDims(fc4out);
+  
+  void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
+  printTensorDims(fc4_bias_out);
+ 
+  void* result = tensorSoftmax(fc4_bias_out);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+} 
+
+
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  test4LayerFC();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network3.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network3.cc
new file mode 100644
index 0000000000000000000000000000000000000000..531bb01695cddb70de0f9bea90f6b229679e9bce
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_fc_network3.cc
@@ -0,0 +1,93 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+
+
+void test4LayerFC(){
+
+  printf("********* 4-layer FC Network ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				float_type,
+				test_batch_size, 1, 28, 28);    
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network3/fc1.bin",
+					 float_type, 1, 1, 784, 512);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network3/fc1_bias.bin",
+				      float_type, 1, 512, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network3/fc2.bin",
+					 float_type, 1, 1, 512, 256);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network3/fc2_bias.bin",
+				      float_type, 1, 256, 1, 1);  
+  void* fc3_weights = readTrainedWeights("../model_params/FC_network3/fc3.bin",
+					 float_type, 1, 1, 256, 128);  
+  void* fc3_bias = readTrainedWeights("../model_params/FC_network3/fc3_bias.bin",
+				      float_type, 1, 128, 1, 1);  
+  void* fc4_weights = readTrainedWeights("../model_params/FC_network3/fc4.bin",
+					 float_type, 1, 1, 128, 10);  
+  void* fc4_bias = readTrainedWeights("../model_params/FC_network3/fc4_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+  // Start Profiling execution times of Tensor operations
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  printTensorDims(fc1out);
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  printTensorDims(fc1_bias_out);
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+ 
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  printTensorDims(fc2out);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  printTensorDims(fc2_bias_out);
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+
+  // Layer-3
+  void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
+  printTensorDims(fc3out);
+  
+  void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
+  printTensorDims(fc3_bias_out);
+  void* fc3_relu = tensorRelu(fc3_bias_out);
+
+  // Layer-4
+  void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights);  
+  printTensorDims(fc4out);
+  
+  void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
+  printTensorDims(fc4_bias_out);  
+  void* fc4_relu = tensorRelu(fc4_bias_out);
+ 
+  void* result = tensorSoftmax(fc4_relu);
+  printTensorDims(result);
+
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+} 
+
+
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  test4LayerFC();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e21b09fbf59c6ceee2adcf6df798ef04351a03ef
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet.cc
@@ -0,0 +1,178 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetArch2(){
+
+  printf("********* Lenet Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin",
+					  CUDNN_DATA_FLOAT, 20, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin",
+					CUDNN_DATA_FLOAT, 1, 20, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin",
+					  CUDNN_DATA_FLOAT, 50, 20, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin",
+					CUDNN_DATA_FLOAT, 1, 50, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin",
+					 CUDNN_DATA_FLOAT, 1, 1, 800, 500);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_params/ip1.bias.bin",
+				      CUDNN_DATA_FLOAT, 1, 1, 1, 500);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin",
+					 CUDNN_DATA_FLOAT, 1, 1, 500, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_params/ip2.bias.bin",
+				      CUDNN_DATA_FLOAT, 1, 1, 1, 10);  
+
+
+  // Start power and performnce profiling 
+  startProfiling();
+  
+  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+  void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1,
+				     conv_mode, conv_precision);
+  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+  printTensorDims(conv1out);
+
+  void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool1out);
+
+  // NOTE: input channels have to match between tensor op inputs and outputs 
+  void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1,
+				     conv_mode, conv_precision);
+  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+
+  printTensorDims(conv2out);
+
+  void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool2out);
+   
+  void* gemm1out = tensorGemm(pool2out, fc1_weights);  
+  printTensorDims(gemm1out);
+  
+  void* gemm1biasout = tensorGemmBias(gemm1out, fc1_bias);
+  printTensorDims(gemm1biasout);
+
+  void* relu1out = tensorRelu(gemm1biasout);
+  printTensorDims(relu1out);
+  
+  void* gemm2out = tensorGemm(relu1out, fc2_weights);  
+  printTensorDims(gemm2out);
+  
+  void* gemm2_biasout = tensorGemmBias(gemm2out, fc2_bias);
+  printTensorDims(gemm2_biasout);
+  
+  void* result = tensorSoftmax(gemm2_biasout);
+  printTensorDims(result);
+
+  // End profiling and dump output to profile.txt
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+/* This architecture REMOVES the bias adds */
+void testLenetArch3(){
+
+  printf("********* Lenet Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;
+  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+					 CUDNN_DATA_FLOAT,
+					 test_batch_size, 1, 28, 28);
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin",
+						   CUDNN_DATA_FLOAT, 20, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin",
+						 CUDNN_DATA_FLOAT, 1, 20, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin",
+						   CUDNN_DATA_FLOAT, 50, 20, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin",
+						 CUDNN_DATA_FLOAT, 1, 50, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin",
+						  CUDNN_DATA_FLOAT, 1, 1, 800, 500);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin",
+						  CUDNN_DATA_FLOAT, 1, 1, 500, 10);  
+
+  /* Convolution specific parameters */
+  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+  
+  void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1,
+				     conv_mode, conv_precision);
+  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+  printTensorDims(conv1out);
+
+  void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool1out);
+
+  // NOTE: input channels have to match between tensor op inputs and outputs 
+  void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1,
+				     conv_mode, conv_precision);
+  printTensorDims(conv2out);
+
+  void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool2out);
+   
+  void* gemm1out = tensorGemm(pool2out, fc1_weights);  
+  printTensorDims(gemm1out);
+
+  void* relu1out = tensorRelu(gemm1out);
+  printTensorDims(relu1out);
+  
+  void* gemm2out = tensorGemm(relu1out, fc2_weights);  
+  printTensorDims(gemm2out);
+
+  void* result = tensorSoftmax(gemm2out);
+  printTensorDims(result);
+	 
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+  printf("END of Lenet Arch3 -- \n");
+}
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  //testTensorAdd();
+  //testTensorConv();
+  //testTensorPool();
+  //testTensorGemm();
+  //testTensorGemmBias();
+  //testTensorRelu();
+  //testTensorSoftmax();
+
+  //testLenetArch();
+  testLenetArch2();
+  //testLenetArch3();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet2.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..77f256320e158fb13555e83d0fbe260ce9d3a83f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_lenet2.cc
@@ -0,0 +1,111 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenet2Arch(){
+
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000;  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  // Start power and performnce profiling 
+  startProfiling();
+  
+  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+  // NOTE: 'SAME' convolution
+  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+
+  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
+  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
+  printTensorDims(conv1out);
+
+  void* conv1_reluout = tensorRelu(conv1out);
+  //dumpWeightsToFile("tensors_out/conv1.out", conv1_reluout);  
+
+  void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool1out);
+  // NOTE: input channels have to match between tensor op inputs and outputs 
+  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
+				     conv_mode, conv_precision);
+  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
+  printTensorDims(conv2out);
+
+  void* conv2_reluout = tensorRelu(conv2out);
+  //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout);  
+
+  void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2);
+  printTensorDims(pool2out);
+  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
+  
+  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
+  printTensorDims(gemm1out);
+  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
+  
+  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
+  printTensorDims(gemm1biasout);
+
+  void* relu1out = tensorRelu(gemm1biasout);
+  printTensorDims(relu1out);
+  
+  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
+  printTensorDims(gemm2out);
+  
+  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
+  printTensorDims(gemm2_biasout);
+  
+  void* result = tensorSoftmax(gemm2_biasout);
+  printTensorDims(result);
+
+  // End profiling and dump output to profile.txt
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
+		  test_batch_size, result);
+  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
+}
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenet2Arch();
+  
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ee4cc95d411885a8adb5a7d3ec0ed9646d7b005b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
@@ -0,0 +1,425 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+void testTensorGemm(){
+
+  printf("***** TensorSgemm ***** \n\n");
+  void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1);
+  struct Tensor* lhs = (struct Tensor*) lhs_ptr;
+  fillTensorWithOnes(lhs);
+  
+  float* data_arr = (float*) lhs->host_data;
+  for(int i = 0; i < lhs->num_elems; i++){
+    data_arr[i] = (i / 4) + 1;
+  }
+  
+  void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3);
+  fillTensorWithOnes(rhs);
+  
+  void* output = tensorGemmCPU(lhs, rhs);   
+  printTensorValues(output);
+
+  void* bias_ptr = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 1, 1);
+  struct Tensor* bias = (struct Tensor*) bias_ptr;
+  fillTensorWithOnes(bias);
+
+  float* bias_arr = (float*) bias->host_data;
+  for(int i = 0; i < bias->num_elems; i++){
+    bias_arr[i] = i + 1;
+  }
+  
+  void* output2 = tensorAdd(output, bias);
+  printTensorValues(output2);
+}
+
+
+void testTensorHgemm(){
+
+  printf("***** TensorHgemm ***** \n\n");
+  void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1);
+  struct Tensor* lhs = (struct Tensor*) lhs_ptr;
+  fillTensorWithOnes(lhs);
+  
+  float* data_arr = (float*) lhs->host_data;
+  for(int i = 0; i < lhs->num_elems; i++){
+    data_arr[i] = (i / 4) + 1;
+  }
+  
+  void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3);
+  fillTensorWithOnes(rhs);
+  
+  void* output = tensorHalfGemm(lhs, rhs);   
+  printTensorValues(output);
+
+  void* bias_ptr = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 1, 1);
+  struct Tensor* bias = (struct Tensor*) bias_ptr;
+  fillTensorWithOnes(bias);
+
+  float* bias_arr = (float*) bias->host_data;
+  for(int i = 0; i < bias->num_elems; i++){
+    bias_arr[i] = i + 1;
+  }
+  
+  void* output2 = tensorAdd(output, bias);
+  printTensorValues(output2);
+}
+
+
+void testTensorHgemm2(){
+
+  printf("***** TensorHgemm ***** \n\n");
+  void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+				 10000, 800, 1, 1);
+  struct Tensor* lhs = (struct Tensor*) lhs_ptr;
+  
+  float* data_arr = (float*) lhs->host_data;
+  for(int i = 0; i < lhs->num_elems; i++){
+    data_arr[i] = (i / 4) + 1;
+  }
+  
+  void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT,
+			     1, 1, 800, 800);
+  fillTensorWithOnes(rhs);
+  
+  void* output = tensorHalfGemm(lhs, rhs);
+  //printTensorValues(output);
+}
+
+
+void testTensorSgemm2(){
+
+  printf("***** TensorSgemm ***** \n\n");
+  void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+				 10000, 800, 1, 1);
+  struct Tensor* lhs = (struct Tensor*) lhs_ptr;
+  
+  float* data_arr = (float*) lhs->host_data;
+  for(int i = 0; i < lhs->num_elems; i++){
+    data_arr[i] = (i / 4) + 1;
+  }
+  
+  void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT,
+			     1, 1, 800, 800);
+  fillTensorWithOnes(rhs);
+  
+  void* output = tensorGemmGPU(lhs, rhs);
+  //printTensorValues(output);
+}
+
+
+
+void testTensorGemmGPU(){
+
+  printf("***** TensorSgemm ***** \n\n");
+  void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1);
+  struct Tensor* lhs = (struct Tensor*) lhs_ptr;
+  fillTensorWithOnes(lhs);
+
+  float* data_arr = (float*) lhs->host_data;
+  for(int i = 0; i < lhs->num_elems; i++){
+    data_arr[i] = (i / 4) + 1;
+  }
+  
+  void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3);
+  fillTensorWithOnes(rhs);
+  
+  void* output = tensorGemmGPU(lhs, rhs);   
+  printTensorValues(output);
+
+  void* bias_ptr = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 1, 1);
+  struct Tensor* bias = (struct Tensor*) bias_ptr;
+  fillTensorWithOnes(bias);
+
+  float* bias_arr = (float*) bias->host_data;
+  for(int i = 0; i < bias->num_elems; i++){
+    bias_arr[i] = i + 1;
+  }
+  
+  void* output2 = tensorAdd(output, bias);
+  printTensorValues(output2);
+ 
+}
+
+
+
+void testTensorGemmBias(){
+
+  // NOTE: 2nd dim of bias and d2*d3*d4 for the input tensor MUST match 
+  printf("***** TensorGemmBias ***** \n\n");
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2);
+  fillTensorWithOnes(input); 
+  void* bias = create2DTensor(CUDNN_DATA_FLOAT, 1, 4);
+  fillTensorWithOnes(bias);
+
+  void* output = tensorGemmBias(input, bias);
+  printTensorValues(output);
+}
+
+
+
+void testTensorConv2(){
+
+  int conv_mode = 1;  // CROSS_CORRELATION mode
+  int compute_precision = 0; // floating point precision 
+  
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3);
+  fillWithOnesAndTwos(input);
+  void** splits = tensorSplit(input, 2, 1);
+
+  void* conv2W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2);
+  fillTensorWithOnes(conv2W);
+		     
+  void** conv2fils = tensorSplit(conv2W, 2, 0);
+
+  void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 0, 0,
+				       1, 1, conv_mode, compute_precision);
+  printTensorDims(conv2a_out);
+
+  void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 0, 0,
+				       1, 1, conv_mode, compute_precision);
+  printTensorDims(conv2b_out);
+ 
+  void* conv2_outs[2];
+  conv2_outs[0] = conv2a_out;
+  conv2_outs[1] = conv2b_out;
+
+  void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
+  printTensorDims(conv2_concat_out);
+  printTensorValues(conv2_concat_out);
+  
+}
+
+
+
+void testTensorConv3(){
+
+  int conv_mode = 1;  // CROSS_CORRELATION mode
+  int compute_precision = 0; // floating point precision
+  
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 96, 28, 28);
+  fillTensorWithOnes(input);
+  void** splits = tensorSplit(input, 2, 1);
+
+  void* conv2W = readTrainedWeights("../alexnet/params/conv2.bin",
+				    CUDNN_DATA_FLOAT, 256, 48, 5, 5);
+  		     
+  void** conv2fils = tensorSplit(conv2W, 2, 0);
+
+  void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2,
+				       1, 1, conv_mode, compute_precision);
+  printTensorDims(conv2a_out);
+
+  void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2,
+				       1, 1, conv_mode, compute_precision);
+  printTensorDims(conv2b_out);
+ 
+  void* conv2_outs[2];
+  conv2_outs[0] = conv2a_out;
+  conv2_outs[1] = conv2b_out;
+
+  void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
+  printTensorDims(conv2_concat_out);
+  //printTensorValues(conv2_concat_out);
+  dumpWeightsToFile("tensors_out/conv2_test.out", conv2_concat_out); 
+
+  void* conv2bias = readTrainedWeights("../alexnet/params/conv2.bias.bin",
+						  CUDNN_DATA_FLOAT, 1, 256, 1, 1);  
+  void* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias);
+  printTensorDims(conv2bias_out);
+  
+  dumpWeightsToFile("tensors_out/conv2_bias_test.out", conv2bias_out); 
+
+}
+
+
+void testLRN(){
+
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 20, 20, 20, 20);
+  fillTensorWithOnes(input);
+
+  unsigned LRN_window = 5;
+  double LRN_alpha = 2e-05;
+  printf("LRN_alpha = %f \n", LRN_alpha);
+  
+  double LRN_beta = 0.75;
+  double LRN_k = 1.0;
+
+  // TEST-point - Compare TF vs CUDNN
+  void* lrn1out = tensorLRN(input, LRN_window, LRN_alpha, LRN_beta, LRN_k);
+  printTensorDims(lrn1out);
+  dumpWeightsToFile("tensors_out/lrn1_test.out", lrn1out);
+
+  void* input2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 7, 7, 7, 7);
+  fillTensorWithOnes(input2);
+
+  LRN_window = 5;
+  LRN_alpha = 0.5 * LRN_window;
+  
+  LRN_beta = 0.75;
+  LRN_k = 1.0;
+
+  void* lrn2out = tensorLRN(input2, LRN_window, LRN_alpha, LRN_beta, LRN_k);
+  printTensorDims(lrn2out);
+  dumpWeightsToFile("tensors_out/lrn2_test.out", lrn2out); 
+}
+
+
+
+
+void testTensorAdd(){
+
+  // Tensor add with equal dimensions
+  void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2);
+  void* bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2);
+  fillTensorWithOnes(x);
+  fillTensorWithOnes(bias);
+
+  printTensorValues(x);
+  printTensorValues(bias);
+
+  tensorAdd(x, bias);
+  printTensorValues(x);
+
+  // Tensor addd with matching channel dimension
+  void* x2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 2, 2);
+  void* bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 1, 1);
+  fillTensorWithOnes(x2);
+  fillTensorWithOnes(bias2);
+
+  tensorAdd(x2, bias2);
+  printTensorValues(x2);
+}
+
+
+void testTensorError(){
+
+  // Tensor add with equal dimensions
+  void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 128);
+  fillTensorWithOnes(x);
+
+  Tensor* x_tensor = (Tensor*) x;
+  float* data_arr = (float*) x_tensor->host_data;
+  for(int i = 0; i < x_tensor->num_elems; i++){
+    data_arr[i] = 0.2;
+  }
+  
+  tensorAddError(x, 3);
+  printTensorValues(x);
+}
+
+
+void testTensorConv(){
+
+  // NOTE: The input channel count value (param2 to Tensor and Filter) must be the same
+  void* x3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 4, 4);
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  void* filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, 1, 2, 2);
+  fillTensorWithOnes(x3);
+  fillTensorWithOnes(filter);
+
+  int conv_mode = 1; // NOTE: uses CROSS_CORRELATION
+  int compute_precision = 0; // floating point precision for conv
+  
+  void* conv1 = tensorConvolution(x3, filter, 0, 0,
+				  1, 1, conv_mode, compute_precision);
+  printTensorValues(conv1);
+
+  // NOTE: For cudnnTensorAdd, the only dimension that MUST match is channels  
+  void* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
+  fillTensorWithOnes(bias3);
+  tensorAdd(conv1, bias3);
+  printTensorValues(conv1);
+}
+
+
+void testTensorPool(){
+  void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 4, 4);
+  fillTensorWithOnes(x); 
+  void* output = tensorPooling(x, 0, 2, 2, 0, 0, 1, 1);
+  printTensorValues(output);
+}
+
+
+
+void testTensorRelu(){
+
+  // NOTE: 2nd dim of bias and d2*d3*d4 for the input tensor MUST match 
+  printf("***** TensorRelu ***** \n\n");
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2);
+  fillTensorWithNegOnes(input);
+
+  void* output = tensorRelu(input);
+  printTensorValues(output);
+}
+
+
+void testTensorSoftmax(){
+
+  printf("***** TensorSoftmax ***** \n\n");
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 4, 1, 1);
+
+  float* host_ptr = (float*) ((struct Tensor*) input)->host_data;
+  host_ptr[0] = 0.1;
+  host_ptr[1] = 0.2;
+  host_ptr[2] = 0.3;
+  host_ptr[3] = 0.4;
+  host_ptr[4] = 0.5;
+  host_ptr[5] = 0.6;
+  host_ptr[6] = 0.7;
+  host_ptr[7] = 2.5;
+
+  void* output = tensorSoftmax(input);
+  printTensorValues(output);
+}
+
+
+void testSoftmaxOutput(void* output_ptr){
+
+  struct Tensor* output = (struct Tensor*) output_ptr;
+  
+  size_t batch_dim = output->dims.dim_sizes[0];
+  size_t channels = output->dims.dim_sizes[1];
+
+  float* data = (float*) output->host_data;
+  for(int i = 0; i < batch_dim; i++){
+    float sum = 0.0;
+    for(int j = 0; j < channels; j++){
+      sum += data[i * channels + j];
+    }
+    printf("output_sum = %f \n", sum);
+  }
+  
+}
+
+
+
+
+int main(){
+
+  llvm_hpvm_initTensorRt(0);
+
+  startProfiling();
+  
+  testTensorHgemm2();
+  testTensorSgemm2();
+  testTensorConv();
+  testTensorError();
+
+  //testTensorGemm();
+  //testTensorGemmGPU();
+  //testTensorGemmBias();  
+  //testTensorConv2();
+  //testTensorConv3();
+  //testLRN();
+
+  stopProfiling();
+
+  return 0;
+}
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/tuning_src/fc_network_acc.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/tuning_src/fc_network_acc.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7ab357e4ac6e8b6550a71b14ce73c79e20879cf3
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/tuning_src/fc_network_acc.cc
@@ -0,0 +1,108 @@
+
+
+#include <stdio.h>
+#include <cstdlib>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+#include "../include/types.h"
+#include "../include/op_overheads.h"
+
+
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testFCNetworkArch(int op1_acc, int op2_acc, int op3_acc,
+		       int op4_acc, int op5_acc, int op6_acc, int op7_acc){
+
+  printf("********* Fully Connected DNN-1 - Accuracy Tuned ********* \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 10000; 
+  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
+  					    float_type, test_batch_size, 1, 28, 28);  
+
+  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
+						  float_type, 1, 1, 784, 128);  
+  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
+					       float_type, 1, 128, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
+						  float_type, 1, 1, 128, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
+					       float_type, 1, 10, 1, 1);  
+
+  // Start execution profiling Tensor ops
+  startProfiling();
+  
+  // Layer-1
+  void* fc1out = tensorGemmGPU(input, fc1_weights);  
+  void* error_norms = tensorAddError(fc1out, op1_acc);
+  add_norms(error_norms);
+  add_gemm_overheads(input, fc1_weights, op1_acc);
+  
+  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
+  error_norms = tensorAddError(fc1_bias_out, op2_acc);
+  add_norms(error_norms);
+  add_bias_overheads(fc1_bias_out, op2_acc);
+
+  void* fc1_relu = tensorRelu(fc1_bias_out);
+  error_norms = tensorAddError(fc1_relu, op3_acc);
+  add_norms(error_norms);
+  add_relu_overheads(fc1_relu, op3_acc);
+ 
+  // Layer-2
+  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
+  error_norms = tensorAddError(fc2out, op4_acc);
+  add_norms(error_norms);
+  add_gemm_overheads(fc1_relu, fc2_weights, op4_acc);
+  
+  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
+  error_norms = tensorAddError(fc2_bias_out, op5_acc);
+  add_norms(error_norms);
+  add_bias_overheads(fc2_bias_out, op5_acc);
+
+  void* fc2_relu = tensorRelu(fc2_bias_out);
+  error_norms = tensorAddError(fc2_relu, op6_acc);
+  add_norms(error_norms);
+  add_relu_overheads(fc2_relu, op6_acc);
+  
+  void* result = tensorSoftmax(fc2_relu);
+  error_norms = tensorAddError(result, op7_acc);
+  add_norms(error_norms);
+  add_bias_overheads(result, op7_acc);
+  
+  stopProfiling();
+  
+  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
+
+  dump_result("accuracy_summary");
+}
+
+
+
+int main(int argc, char* argv[]){
+
+  if(argc < 8){
+    printf("Must provide 7 knobs for accuracy tuning \n");
+    abort();
+  }
+  
+  // This initializes the runtime - must be called before anything
+  llvm_hpvm_initTensorRt(0);
+
+  int op1_acc = atoi(argv[1]);
+  int op2_acc = atoi(argv[2]);
+  int op3_acc = atoi(argv[3]);
+  int op4_acc = atoi(argv[4]);
+  int op5_acc = atoi(argv[5]);
+  int op6_acc = atoi(argv[6]);
+  int op7_acc = atoi(argv[7]);
+  
+  testFCNetworkArch(op1_acc, op2_acc, op3_acc,
+		    op4_acc, op5_acc, op6_acc, op7_acc);
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17b5b1e6bfbccd08a42fdf7ee241a7742e764ffb
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5187944d335d316a3d5a4015d7da69e425878347
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fe15f7c890cdc9c6e1afd83dc50b8c1308a55dcc
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3c7278911331c715598268586202b1d95aa5ef58
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a11cdfff7f2e6f1f70fc37e8c0da9b3997116f27
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..493f78d98eec57da2bb3004079a64f0584ea60d9
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc3_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4f940102eb8a17051c44e8fe12a6b7730a61c15d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd3305efb194f19475cb0a260f845efc8bd986e7
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/FC_network1/fc4_bias.bin
@@ -0,0 +1 @@
+îˆ¿Ç›±¿žÓ?Ü‚?Sˆ¾6Í¾bZö>c÷”¿¡™Ê<`–i?
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d24151936ec9c89e260439d55edf42d2dc55723f
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9cca044999ee589bbfdfada84db040751559cd26
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7fc42c720505d595c80dc426cc739dcdc5e5c7e2
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eb2e7856b366465f0574ad68d71c88432d021b27
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/fc2_bias.bin
@@ -0,0 +1 @@
+°F¾Êgw>I‹q<?N½»ƒ¾Ò…¹>	„m½É > Ý†¾ÝH/½
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/mnist_float_input.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/mnist_float_input.bin
new file mode 100644
index 0000000000000000000000000000000000000000..779dcf7f6ad72f3e22d5c96148d2f0f7e11e39b8
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network2/mnist_float_input.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3a2acb0bdfd5b2073eaad1ec51b99faf5a60ae07
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6b20a053997e30f768995fbf4f27ab6b04f8403f
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dd955fc10e8727ed4bf41100f7b74d2026d0cad6
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e225b78da4cf02f9500020edbd2a6f68a03ab7aa
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d3b5c08ee1330f8f33d24f12aad8c78437a54a28
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e3a7f0b8f93603a474ac3ef23a48b550de1e2327
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc3_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f92c14bde53bd000f8d3f7993373fe6797dd4921
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6459c295b4dc106194c17ce55c363caded6b8bc8
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/FC_network3/fc4_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bc4e57ac0a859851103667d6eb8cc835b70e04de
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3acb43f03590d809b55df5a4cc264a1d4f8318ba
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ccac40a8f33803d941fa0041c8568ea589fdd945
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3aff63b21593ec7ebd0c04f41f151bad113cc2e4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/fc2_clipped/fc2_bias.bin
@@ -0,0 +1 @@
+ã¿ê¥>¿½×J?
ñ"¿Rùû¾8‰Š¼=ìß>qO¿ÂB?
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..be2731c02774a6ed6c554121cb507ca753b87144
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ea4933ab67c82010bde969df1028adb828c0a44d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d69299cae0826fc57a32a3bc389bcf25603d9bc6
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1001ecf3ac16b388f4619d12bfeab4ca7db3e726
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dd97ce049b50c71430f4db63219931e168e05515
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..460b7fe40bd60902338b99aed2446ed746c1c8ea
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/fc3_clipped/fc3_bias.bin
@@ -0,0 +1 @@
+8I¿˜jc?—ì>¢_Ñ½DÒ=b"¿Hý½ÊMõ>	xo¿3*?
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..56ec40072906c21991c9bee2985651f6fadeaba0
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..421dd8c3782c2acfe019530565c2855cdccd3bf0
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf18f1c5d631ce9734cfc067b503c7d9484fd1ba
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7fce451559ed38bfa5d642f6df6d13e270a49c4c
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..57938a2e2db17982f9646ea57cb2551c41f1ce8f
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f5ae2b4abd77063bd80cda4a9321c62d5a42070b
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc3_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7ed4b5a50917b127351dc7a673a8c87ac8ddedd6
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0633a1a2bf444ecbe6e0b5896c92b72a5fc7ecdb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/fc4_clipped/fc4_bias.bin
@@ -0,0 +1 @@
+Éìù¾°ŸÐ>-¾¤‹ó¾}*&¿ñf¿C/˜¾i¤à½v”=?®éJ¾
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..670261107ead208daa54e1471a1818b098315ee7
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e4469ef562dae74e197fa44fb8ac762ac5c4a288
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv1_bias.bin
@@ -0,0 +1,3 @@
+òÙ¼W½Bì;Çz$ k'a=a¸î½`Ôƒ¸v£Žµ¤ÿ¾†	W¾ƒLl½ÇÄž=?š¿Zîß=<ò!¿x”"¿Ž!”‹Á
+8½à’¼u¶¾dì¿ƒ]¼Ffr¾ZÅ×½#¶½^žº±Ø½j5¼€)
+¿æc4¿lÿå¾uú½
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e03b76e819f700dc0e5a23919e1ec277ec4774b
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3a8053c9575fcff5134dbb921accaf37fd294e46
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/conv2_bias.bin
@@ -0,0 +1,2 @@
+œ§Y¾¨ü¸¾ Ñ¼£2ß¸Z—‹Ê÷˜,V5=]±¾¿ë„¾¨çE§
+¨¹²Z`¾MÑÏ¾È¼ß–‹U”‚¾"Rƒ<2À¾4Áª»íž)=L¹E¬Ç …½[ÚA¾‰	¸½!âs½]1¼›²#¾o"Ÿ†u=]Öéº-ìý½”*»A9ú½¼ö½¾d6Š<ˆ1æ‹Ìf¾c«¬<>R“‹MŽ¾K®½Mÿù½j¾-½O™…¾Ì8!½ßò"¾Dqú¹Œ|¢¾Cæ.½öýZ¾»8\¾p½‹Ó-Î½¯*÷½}'r½ú'“»e›”‹5îÂ¼)/½>¤>bÓÏ«Öaô¤¶'¾ØÞ¿¾
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6e27b6e9147a65e1db3477bfda0a0a1274783215
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a28db7e3aeb6e2892d31122603a83667995da874
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..01774f984fdf39eaeba5023caa01fe203046667f
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d59c5dee69dffdc9ca49a0707244595074e1a471
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_clipped/fc2_bias.bin
@@ -0,0 +1,2 @@
+h.6=)¿†|’¾ïë
+¢_µ'>;‚n¼¤Oò¼6T®=t#M¾D{°¾
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..202d11adb809f2035db1d4d09806661b8bd978cb
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..11ac18222cac00e38c809f5f132824e1000ccb50
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bea1dfe0079e8fb80730620cad4859f5e2baaa9a
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f537eb0cd2ec3847bcb90fab8bb5025157097b1e
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/conv2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c43543bb447c31e60545358929f4df460a1b0d9f
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..01bd440f4f16036b2ceafbbb5629ee02082ed82d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7b51cb239a1423432059bd84feee57f70068d1fe
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..76535beffd242bfe579ea55cf82e80c60d871c96
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_params/fc2_bias.bin
@@ -0,0 +1 @@
+°Ï=ô ?«~u¿£q½;ƒÀª=G›‚?3$¿"µq¿nTJ?'jE>
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1f67654d3ad5fd01f92efab6b7977ba43bdd523d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9ef4abd162cf515ef3df6f9f10e0d281165d39ec
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv1_bias.bin
@@ -0,0 +1,3 @@
+óDž=ÀÏ¾	×ô»XC=Pñj>§.j=B(s»0óº¾ñ$,¼úrŽ¾
ËÄ¼\øÊ=TYh»Æ¦¸¾¦Cï=+ý¾½
+ÅºjÏ½.Ud¾d*>Q¹Ò¾âÝU¾cqC<óFÓ=°
+Í=k>É‘Î½Xõ¾˜Ç¾Å›S¾Ô<¾ps¼
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fc46877a1a81154f8c78f559c457a0f691289a48
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..42ee244812fd9c46f7208fd8396bf4dbf5df6197
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/conv2_bias.bin
@@ -0,0 +1,2 @@
+º†¾O=×Ä—>®¯¾š¡¾çÕ½i;H=
R—¾'†(>†Ÿ<#>wc\¾
2Ë¾<<í¼¨þ¾Á‡–=¢;ïºÃ†›½÷¼È<øx„>¸/H¾Šw<¬”_¾™¦Ê»øJF>4ñ<ÙµD>È8(=ºà=‹°°¾©ùP¾8½Ñ/É½É–ƒ¾¯åð<ñ€‚½¸¼½=]¾ƒl¾´ÊL½Ù§…¾ªûÕ<ÊTË¾¬>>_êŒ¾Á¾¾Ôa>tC>ÉÛ=‘ü”½Í¾)5D¾Þ‹X½Ô&>éP‹<Þöi¾‚[j¾1_Ã½î¾±ØO¾/¢=aÙ
+¾1Z\=ãÙŽ«
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c9dd2a8c001e075b52feffcd2538107a08c74932
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..77d4c31025156e35069533ef65623a6b019962df
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..92c9f125a58d324947f20b3d41754fd491c2ac63
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d4cb460055bfd675b1c576c7224d00b362d1aa7f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh/fc2_bias.bin
@@ -0,0 +1,2 @@
+ì@÷=nÑ>óñ#>:–¡>)(Ó½Í-¤>$Úh>5¹½¥0„¾
+ê¿¾
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1f67654d3ad5fd01f92efab6b7977ba43bdd523d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9ef4abd162cf515ef3df6f9f10e0d281165d39ec
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv1_bias.bin
@@ -0,0 +1,3 @@
+óDž=ÀÏ¾	×ô»XC=Pñj>§.j=B(s»0óº¾ñ$,¼úrŽ¾
ËÄ¼\øÊ=TYh»Æ¦¸¾¦Cï=+ý¾½
+ÅºjÏ½.Ud¾d*>Q¹Ò¾âÝU¾cqC<óFÓ=°
+Í=k>É‘Î½Xõ¾˜Ç¾Å›S¾Ô<¾ps¼
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fc46877a1a81154f8c78f559c457a0f691289a48
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..42ee244812fd9c46f7208fd8396bf4dbf5df6197
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/conv2_bias.bin
@@ -0,0 +1,2 @@
+º†¾O=×Ä—>®¯¾š¡¾çÕ½i;H=
R—¾'†(>†Ÿ<#>wc\¾
2Ë¾<<í¼¨þ¾Á‡–=¢;ïºÃ†›½÷¼È<øx„>¸/H¾Šw<¬”_¾™¦Ê»øJF>4ñ<ÙµD>È8(=ºà=‹°°¾©ùP¾8½Ñ/É½É–ƒ¾¯åð<ñ€‚½¸¼½=]¾ƒl¾´ÊL½Ù§…¾ªûÕ<ÊTË¾¬>>_êŒ¾Á¾¾Ôa>tC>ÉÛ=‘ü”½Í¾)5D¾Þ‹X½Ô&>éP‹<Þöi¾‚[j¾1_Ã½î¾±ØO¾/¢=aÙ
+¾1Z\=ãÙŽ«
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c9dd2a8c001e075b52feffcd2538107a08c74932
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..77d4c31025156e35069533ef65623a6b019962df
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..92c9f125a58d324947f20b3d41754fd491c2ac63
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d4cb460055bfd675b1c576c7224d00b362d1aa7f
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet2_tanh2/fc2_bias.bin
@@ -0,0 +1,2 @@
+ì@÷=nÑ>óñ#>:–¡>)(Ó½Í-¤>$Úh>5¹½¥0„¾
+ê¿¾
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..321259debd6ff4d1445edcadd69cc6de53f3f03d
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..484b86517f5496d0f97f7d5af2c0ad04b869be6e
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv1_bias.bin
@@ -0,0 +1,2 @@
+;n^?(5à¾Ärp¾øgB¿±"ì¾Vø9¿û»=Z¾>EE¿Úýš>"[€½°0r¾v!Ä>³ð¢½M°!¿±TY¿yÕ_¾š-¾"~G?
5²¾Ë~ý¾yk&¿s2K¾#¢?e
+?«E¿w€=)X=4Z¾	,ö>/b¦>„+>
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..612cedbfad68ad210388bb7c1a9825e97e673872
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2994d417de60c4a04c671978c6cbff4c9457d9ef
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/conv2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..78f283aa48f4fb58fe3bc07a5320836107269596
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cbd06906d6c5a3bf16e45c01d8ccbc2338b20bc8
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..637b3538402d599668af1a90e405345cab4c45b7
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fab36f6c21745e478934d98b64ca1220d0c9fc0e
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet3_params/fc2_bias.bin
@@ -0,0 +1 @@
+É#¨:»Od:É;[€<F ×¹ÀÒC·tZ¸x[Ÿ9ƒ…Û<Übˆº
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..89ab6ad37cac94360f7f87c93676f353829f1deb
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a2a381337e13fe52959c838b4a2bedab3c3f8ab
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv1_bias.bin
@@ -0,0 +1 @@
+h4Q;¤ù;34¼j0_½G½–h;ìz/½ðÇÊ:àk¥¼{l½t+O;u¼8™¨¼d»”½®¼}8›<íO’¼äÕ¿»¤#½„ö¼”u<¼¿l…¼f¢;Ð4½ŠO ½>Øž¼7K¼04½ÎG:à'½ÔOF½M=;
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6cd00b88c5be6e212f2d3a37c8ea2a8edb1ceca7
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c0adf3e885ce855a0cc9d1b4b12f73665187159e
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/conv2_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..152c5bb0baae480f6b8d317889fc68f8d77247b6
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..58221f45cdc56049b2edc29c244ea9d797a87fb5
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..97d78a9610b15be285661c1d762026c9fa4100cb
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cbda59beef150dfbca756621286f042ec8e247bf
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_keras/fc2_bias.bin
@@ -0,0 +1 @@
+Ê%”½ùb½Ó„g½W•½$VÄ½éum½'Æµ½J§’½·¾¶½›¢½
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7536ef9f25e8fe7c7d47dac2857fe1cb291464d6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bias.bin
@@ -0,0 +1,2 @@
+åMS¾¶õÊ=9Øø=…¾kO§>¬¯¾8g¾šÍ®>€Jn=Ü‘Ñ½¥>
+”>ô+L>ä„—>a¹¥>Jº“¾B*3>‹èM>Û`>kßÅ<
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..48dabc33ff1ffc605aba73b34f884c2e43f23910
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..103ae6938d19b43b462c352d4c4d23c0bef7caaf
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ff4242bb8002c7e81e5655bfa197541da6a9921f
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-images-idx3-ubyte b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-images-idx3-ubyte
new file mode 100644
index 0000000000000000000000000000000000000000..1170b2cae98de7a524b163fcc379ac8f00925b12
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-images-idx3-ubyte differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte
new file mode 100644
index 0000000000000000000000000000000000000000..d1c3a970612bbd2df47a3c0697f82bd394abc450
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a9f5de5084ae4506e610ebe7deba62de40f3e536
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..232032080ffe11e84977e84ebfde02c728ba2718
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..67e323754adf73f147a1776916d6f48b7fdd7782
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bias.bin
@@ -0,0 +1 @@
+Ž½ÒŠ;=-‰¼Çjz¼éœºYÕ¼—ˆ<X’ƒ¹ Mú¼œò•¼
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddde5fb3258d7abea7ece3fc0455e7532e4a30ee
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/ip2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/mnist_float_input.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/mnist_float_input.bin
new file mode 100644
index 0000000000000000000000000000000000000000..779dcf7f6ad72f3e22d5c96148d2f0f7e11e39b8
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_params/mnist_float_input.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c841ed3b821617f81fc8764830868e64713668db
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4566564e1ad5dc9a0231575ca33f752b53c24a7
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2304c792451e65d7a6f4615060dfc0c90164dc29
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08c01b1586c269269d8dc8951afb7cd0c02606b2
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/conv2_bias.bin
@@ -0,0 +1,2 @@
+è¾#¬¿ÀIº¾¹¾ë É¾¨9D¾èÍž¾£½fn»>_Ái>ú»«¾PØ>
+þv¾K,,¾az.¾&Ñ¾c…:¾EÂ¥>\Œ«>œöú½á†]½ÿ%]¾ö¸P>íi<>Ðû\¾¹ã¾åÀ)¾d€™>oÞ½«‡%>ŽÎ9½zNâ½È:>Ù˜\¾¦ºj>vP>ÿ›4>‚¾ÖDà¾õ§Ð¾¶õ¬>qS¾Œ'á¾:é;zb$>Àƒu>í9w>¦˜n½+Ò¾»‘…¾“ÐS½ª
>Ñ˜º½b¾#)¸¾’%e>=ä#<Íà¾Ã¾E‰2¾]«Y=r¦³½)*k¾ ,¿
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f59c44723443b40667340a60ae20311133c425a
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..24656c9753f9ab1b6d8b648f2fe7f3d6af24bebd
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1b567de77acfc62b54ec4a676df8256b07a6b127
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a7c0eae24cc844613f616fda43cd444c5f506ebf
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/fc2_bias.bin
@@ -0,0 +1 @@
+µs¾Ó˜>P5>ù>bëÈ=_Ïú½ƒA‚>Äô}>\¾+Nè<
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c841ed3b821617f81fc8764830868e64713668db
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4566564e1ad5dc9a0231575ca33f752b53c24a7
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2304c792451e65d7a6f4615060dfc0c90164dc29
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08c01b1586c269269d8dc8951afb7cd0c02606b2
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/conv2_bias.bin
@@ -0,0 +1,2 @@
+è¾#¬¿ÀIº¾¹¾ë É¾¨9D¾èÍž¾£½fn»>_Ái>ú»«¾PØ>
+þv¾K,,¾az.¾&Ñ¾c…:¾EÂ¥>\Œ«>œöú½á†]½ÿ%]¾ö¸P>íi<>Ðû\¾¹ã¾åÀ)¾d€™>oÞ½«‡%>ŽÎ9½zNâ½È:>Ù˜\¾¦ºj>vP>ÿ›4>‚¾ÖDà¾õ§Ð¾¶õ¬>qS¾Œ'á¾:é;zb$>Àƒu>í9w>¦˜n½+Ò¾»‘…¾“ÐS½ª
>Ñ˜º½b¾#)¸¾’%e>=ä#<Íà¾Ã¾E‰2¾]«Y=r¦³½)*k¾ ,¿
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f59c44723443b40667340a60ae20311133c425a
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..24656c9753f9ab1b6d8b648f2fe7f3d6af24bebd
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1b567de77acfc62b54ec4a676df8256b07a6b127
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a7c0eae24cc844613f616fda43cd444c5f506ebf
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh/lenet_tanh/fc2_bias.bin
@@ -0,0 +1 @@
+µs¾Ó˜>P5>ù>bëÈ=_Ïú½ƒA‚>Äô}>\¾+Nè<
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9c6404768d1262101afc967cd3b660a7e757cd25
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8335621803cf622c0724fd437623d9277efb458c
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..09c1e72f56a144675d48e5d4969e260100c35ada
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3d4d6e388f22cf825a1e8b434fd34080fc8912e8
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/conv2_bias.bin
@@ -0,0 +1,3 @@
+†¹œ½~Ä
+½Î"y½ô£½È½ès½eªf½tN¼ ¬½¨X½¨„?¼›þŸ¼ž¯½OÙ3½%§¼ß³H½†À
+½&½JO0½¶wž½¥:o½ðÐµ¼ý[Z½P½S‚³½ˆé?½Ä *½Š^[»`ì(½Æœ½þã½ÛV®¼¨ü˜½çtÆ½û¯¸º|Üv½®‰½ó®Ã¼@»¼ïÖŒ½
Hš¼ó‰»µh‹½ux½-N,½«Ú©½c†N»è¡Ž½µ¤@½ÈzÕ¼4™½.·!½ÐÊÀ¼ú®½c¬Ûº>.½xà½½á¼Oû½ý+;¼¿œ•½i‹š½éµ–¼ØGP½
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..36e6a84bfba394921e4ca50c2acaba1482ea0ae1
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..56442e581a16e7f76a46866274c0ea66ea8be086
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0ad7ae497969781128a6f98fd923655934fd217a
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4cb6d824d540d66502b5e7ab0157e567a2d1a300
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_tanh2/fc2_bias.bin
@@ -0,0 +1 @@
+©|½8Ý;ÿÄ‚½!´½!Î¶½ß[“½×ý–½$Ðm½0Œ¦½›V´½
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ca013bfaafd114694b2f83ecfc2d177fdb38990e
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bbb78f121d9c2a1ae219a45cc20539a990648186
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv1_bias.bin
@@ -0,0 +1 @@
+éÃ?”è¿åSª?*?‡%?’òJ¿a‰€¿CM]¾ ˆÎ>Íž¿Pú€?µ¿ûd»¾® y¿>‰?"™Õ?<ª¾|«ˆ¿Vr³¿~†?_!/?]Ú@B:L¿Ý¼Í>‡Å·¾<J„¾;–©?ek>rËß¾ "N?cPs¿st]¿
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..621b5259648bb00ade00273d73ed30f2dc0af52c
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..61ffbbc2ae7d92dd220e09ca1418898bf63f973e
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/conv2_bias.bin
@@ -0,0 +1,2 @@
+s+á¾ F§=´ïH¿J›@*G¿Uý±¿-ôA>»¢¿tƒô½q¾Ö¯ƒ>ÒáÙ½lÒ?é…&½æ¾cq}¿ö¼½‹@c÷o=òp?p–‡?×˜?&j<ãR??&‚Ó¿w¥¿A…í¿%î>DXÁ¿ŽQ;@½¡£?µ1”?äßÀWp¿{è…¾ÙÌ¦¾-G¿"©ˆ¿vË¥¾8Fd¿ãÔó½‚%?ZnÕ¾7ÍM¿a·K¿ó“<9/?]/P?|‚H?y5˜¿.A?bï¿-øˆ¿A.Ù?<t¨½&òÀÅ3>óN’¿sÝ
+ÀF+è?‰;?,b«?©ýÎ¿‡ºæ>
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c8a16853b5dcf00cfaac58438ee28c8c9273b077
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3346319c7e19432d16e3eb471b53216fa6efb162
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc1_bias.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..19286ba29008b48b34409b963e92e8817d35e6e3
Binary files /dev/null and b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2.bin differ
diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2_bias.bin b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2_bias.bin
new file mode 100644
index 0000000000000000000000000000000000000000..94a9ea8487c42b6b27d411ab678f64085b12fef4
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_test_params/fc2_bias.bin
@@ -0,0 +1 @@
+M‡“?#=e?N‹!¿‰ñs¿¸OÕ¿„@\?2±“¿j3?sÆ¢½º—o¿
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/#error.h# b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/#error.h#
new file mode 100644
index 0000000000000000000000000000000000000000..d474152fd80ecc90e07092795e513e2d97da0129
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/#error.h#
@@ -0,0 +1,627 @@
+
+#ifndef ERROR_HEADER
+#define ERROR_HEADER
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+#include <ctime>
+#include <cfloat>
+#include <algorithm>
+#include <sstream>
+#include <vector>
+#include <iostream>
+#include <random>
+#include <string>
+#include <time.h>
+
+#include <curand.h>
+#include <curand_kernel.h>
+#include <math.h>
+#include <assert.h>
+
+
+#include "../include/debug.h"
+#include "tensor.h"
+#include "profiling.h"
+#include "tensor_utils.cu"
+#include "global_data.h"
+
+
+
+
+void readOpenTunerFlags(char* file_name){
+
+  total_ops = 0;
+  op_counter = 0;
+  op_accuracies.clear();
+
+  
+  FILE* fp = fopen(file_name, "r");
+  if(fp == NULL){
+    ERROR("File 'opentuner_flags' not found \n");
+  }
+    
+  int retVal = 200;
+  while(retVal != EOF){
+
+    int op_acc;
+    if(fp != NULL)
+      retVal = fscanf(fp, "%d", &op_acc);
+    else
+      op_acc = 0;
+    
+    op_accuracies.push_back(op_acc);
+    //printf("op_accuracies = %d, total_ops =%d \n", op_accuracies[total_ops], total_ops);
+    total_ops++;
+  }
+  
+  fclose(fp);
+}
+
+
+
+
+
+/*__device__ inline void atomicAdd(float* address, float value)
+
+{
+
+  float old = value;
+  float new_old;
+
+  do{
+    new_old = atomicExch(address, 0.0f);
+    new_old += old;
+  }
+
+  while ((old = atomicExch(address, new_old))!=0.0f);
+
+};
+*/
+
+
+
+
+
+Norm_t* calculateNorms(Tensor* x, Tensor* x_orig){
+
+  deviceToHostCopy(x);
+  deviceToHostCopy(x_orig);
+
+  // NOTE: Move floats to doubles - overflow is quite possible
+  float l1_norm = 0.0;
+  float l2_norm = 0.0;
+  float inf_norm = -1.0;
+  double total = 0.0;
+
+  float* arr1 = (float*) x->host_data;
+  float* arr2 = (float*) x_orig->host_data;
+  
+  for(unsigned int i = 0; i < x->num_elems; i++){
+
+    total = total + arr2[i];
+    
+    float diff = abs(arr1[i] - arr2[i]);
+    l1_norm += diff;
+    l2_norm += (arr1[i] - arr2[i]) *  (arr1[i] - arr2[i]);
+
+    if(inf_norm < diff)
+      inf_norm = diff;
+  }
+
+  l1_norm = l1_norm / (x->num_elems * 1.0);
+  l2_norm = l2_norm / (x->num_elems * 1.0);
+
+  double distribution_mean = total / (x->num_elems * 1.0);
+  l1_norm = l1_norm / distribution_mean;
+  l2_norm = l2_norm / distribution_mean;
+
+    
+  Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t));
+  norms->l1_norm = l1_norm;
+  norms->l2_norm = l2_norm;
+  norms->inf_norm = inf_norm;  
+  
+  INFO("l1_norm = %f \n", l1_norm);
+  INFO("l2_norm = %f \n", l2_norm);
+  INFO("inf_norm = %f \n", inf_norm);
+
+  return norms;
+}
+
+
+
+Norm_t* calculateNorms2(Tensor* x, Tensor* x_orig){
+
+  deviceToHostCopy(x);
+  deviceToHostCopy(x_orig);
+
+  // NOTE: Move all floats to doubles - overflow is quite possible
+  double l0_norm_A = 0.0;
+  double l0_norm_B = 0.0;
+
+  double l1_norm_A = 0.0;
+  double l1_norm_B = 0.0;
+  
+  double l2_norm_A = 0.0;
+  double l2_norm_B = 0.0;
+  float inf_norm = -1.0;
+  float orig_inf_norm = -1.0;
+  double total_diff = 0.0;
+  double total_diff_squared = 0.0;
+ 
+  float* arr1 = (float*) x->host_data;
+  float* arr2 = (float*) x_orig->host_data;
+  
+  for(unsigned int i = 0; i < x->num_elems; i++){
+
+    if(arr2[i] != 0.0)
+      l0_norm_A = l0_norm_A + 1.0;
+    if(arr1[i] != 0.0)
+      l0_norm_B = l0_norm_B + 1.0;
+        
+    l1_norm_A = l1_norm_A + abs(arr2[i]);
+    l1_norm_B = l1_norm_B + abs(arr1[i]);
+
+    l2_norm_A = l2_norm_A + (arr2[i] * arr2[i]);
+    l2_norm_B = l2_norm_B + (arr1[i] * arr1[i]);
+      
+    float diff = abs(arr1[i] - arr2[i]);
+    total_diff = total_diff + diff;
+    float diff_squared = diff * diff;
+    total_diff_squared = total_diff_squared + diff_squared; 
+
+
+    if(orig_inf_norm < diff){
+      orig_inf_norm = diff;
+    }
+    
+    // Relative difference value
+    float normalized_diff = diff / arr2[i];   
+    if(inf_norm < normalized_diff){
+      inf_norm = normalized_diff;
+    }    
+  }
+
+  // Relative L1 and Mean L1 norms of the difference Matrix
+  float mean_l1 = ( total_diff ) / x->num_elems;
+  float relative_l1 = ( total_diff ) / l1_norm_A;
+
+  // Computing Relative L2 norm - i.e., Euclidean distance
+  double norm_root_A = sqrt(l2_norm_A);
+  double diff_root = sqrt(total_diff_squared);
+  float mean_l2 = diff_root / x->num_elems;
+  float relative_l2 = diff_root / norm_root_A;
+
+  // Packing computed norms in Norm_t struct
+  Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t));
+  // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware
+  norms->mean_l1 = mean_l1;
+  norms->mean_l2 = mean_l2;
+  norms->orig_inf_norm = orig_inf_norm;
+
+  // Relative metrics (relative to distribution) - suitable for PROMISE
+  norms->l1_norm = relative_l1;
+  norms->l2_norm = relative_l2;
+  norms->inf_norm = inf_norm;  
+  
+  INFO("l1_norm = %f \n", relative_l1);
+  INFO("l2_norm = %f \n", relative_l2);
+  INFO("inf_norm = %f \n", inf_norm);
+
+  return norms;
+}
+
+
+
+
+
+__global__ void normComputeKernel(float* A, float * B, double* l1_A, double* l2_A,
+				  double* l1_diff, double* l2_diff, unsigned int n){
+
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(i < n){
+    
+    double diff = fabsf(A[i] - B[i]);
+    double diff_squared = diff * diff;   
+
+    atomicAdd( l1_A,  fabsf(A[i]) );
+    atomicAdd( l2_A, (A[i] * A[i]) );
+
+    atomicAdd( l1_diff, diff);
+    atomicAdd( l2_diff, diff_squared);
+  }
+}
+
+
+
+// Compute Norms on the GPU
+Norm_t* calculateNormsGPU(Tensor* x, Tensor* x_orig){
+
+  hostToDeviceCopy(x);
+  hostToDeviceCopy(x_orig);
+
+  // FIXIT: Move all floats to doubles - overflow is possible
+  
+  double l1_norm_A;
+  double l2_norm_A;
+
+  double l1_diff;
+  double l2_diff;
+
+  // Device pointers
+  double *l1_norm_A_d;
+  double *l2_norm_A_d;
+  double *l1_diff_d;
+  double *l2_diff_d;
+  
+  cudaMalloc( (void**) &l1_norm_A_d, sizeof(double));
+  cudaMalloc( (void**) &l2_norm_A_d, sizeof(double));
+  cudaMalloc( (void**) &l1_diff_d, sizeof(double));
+  cudaMalloc( (void**) &l2_diff_d, sizeof(double));
+ 
+    
+  float* arr1 = (float*) x->gpu_data;
+  float* arr2 = (float*) x_orig->gpu_data;
+
+  int blockSize = 1024;
+  int gridSize = (int) ceil ((float) x->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  normComputeKernel<<<gridSize, blockSize>>>(arr1, arr2, l1_norm_A_d, l2_norm_A_d, l1_diff_d, l2_diff_d, x->num_elems);
+
+  cudaMemcpy(&l1_norm_A, l1_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost);
+  cudaMemcpy(&l2_norm_A, l2_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost);
+  cudaMemcpy(&l1_diff, l1_diff_d, sizeof(double), cudaMemcpyDeviceToHost);
+  cudaMemcpy(&l2_diff, l2_diff_d, sizeof(double), cudaMemcpyDeviceToHost);
+  
+
+  // Relative L1 and Mean L1 norms of the difference Matrix
+  float mean_l1 = l1_diff / x->num_elems;
+  float relative_l1 = l1_diff / l1_norm_A;
+
+  // Computing Relative L2 norm - i.e., Euclidean distance
+  double norm_root_A = sqrt(l2_norm_A);
+  double diff_root = sqrt(l2_diff);
+  float mean_l2 = diff_root / x->num_elems;
+  float relative_l2 = diff_root / norm_root_A;
+
+  // Packing computed norms in Norm_t struct
+  Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t));
+  // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware
+  norms->mean_l1 = mean_l1;
+  norms->mean_l2 = mean_l2;
+  norms->orig_inf_norm = 0.0;
+
+  // Relative metrics (relative to distribution) - suitable for PROMISE
+  norms->l1_norm = relative_l1;
+  norms->l2_norm = relative_l2;
+  norms->inf_norm = 0.0;  
+  
+  INFO("l1_norm = %f \n", relative_l1);
+  INFO("l2_norm = %f \n", relative_l2);
+
+  return norms;
+}
+
+
+
+
+__global__ void vecConstMul(float* A, float mul_factor, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    A[id] = A[id] * mul_factor; 
+}
+
+
+__global__ void vecRound(float* A, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    A[id] = roundf(A[id]); 
+}
+
+
+__global__ void vecConstDiv(float* A, float div_factor, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    A[id] = A[id] / div_factor; 
+}
+
+
+
+__global__ void vecMul(float* A, float* B, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    B[id] = A[id] * B[id]; 
+}
+
+
+/****  ERROR injecion routines  ******/
+
+void initRandValues(Tensor* bias, int error_scale){
+
+  float scaling_values[20];
+
+  // FIXIT: Error knob 0 should be 0 zero
+  scaling_values[0] = 0.016;
+  scaling_values[1] = 0.018;
+  scaling_values[2] = 0.022;
+  scaling_values[3] = 0.026;
+  scaling_values[4] = 0.030;
+  scaling_values[5] = 0.035;  
+  scaling_values[6] = 0.04;
+  scaling_values[7] = 0.06;
+  scaling_values[8] = 0.08;
+  scaling_values[9] = 0.1;
+  //scaling_values[8] = 0.15;
+  //scaling_values[9] = 0.2;
+  scaling_values[10] = 0.25;
+  scaling_values[11] = 0.3;
+  scaling_values[12] = 0.35;
+  scaling_values[13] = 0.4;
+  scaling_values[14] = 0.45;
+  // Values below are currently unused by Opentuner
+  scaling_values[15] = 0.5;
+  scaling_values[16] = 0.55;
+  scaling_values[17] = 0.6;
+  scaling_values[18] = 0.65;
+  scaling_values[19] = 0.7;
+
+  curandGenerator_t gen;
+
+  struct timespec ts;
+  if(timespec_get(&ts, TIME_UTC) == 0){
+    printf("crashed \n");
+    abort();
+  }
+
+  curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
+
+  curandSetPseudoRandomGeneratorSeed(gen, ts.tv_nsec^ts.tv_sec);
+    
+  curandGenerateNormal(gen, (float*) bias->gpu_data, bias->num_elems, 0.0, 1.0 * scaling_values[error_scale]);
+
+  
+  /*
+  std::random_device rd;
+  std::mt19937 mt(rd());
+  std::normal_distribution<float> distribution(0.0, 1.0);
+  
+  float* data_arr = (float*) bias->host_data;
+  for(int i = 0; i < bias->num_elems; i++){
+    float rand_num = distribution(mt);
+    data_arr[i] = scaling_values[error_scale] * rand_num;   
+  }
+  */
+  
+}
+
+
+
+void* addBitError(void* x_ptr, int error_scale){
+
+  if(error_scale > 6 || error_scale < 0){
+    ERROR("Error Scale out of bounds \n");
+  }
+      
+  INFO("*** TensorBitError \n");  
+  profileEvent("tensorBitError");
+
+  Tensor* x = (Tensor*) x_ptr;
+  
+  size_t* dim_sizes = x->dims.dim_sizes; 
+  Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					        dim_sizes[0], dim_sizes[1],
+						dim_sizes[2], dim_sizes[3]);
+
+  // Copying x data into x_original - for computing Norms 
+  tensorCopy(x, x_original);
+
+  // Quadratic Error
+  float freq_factors[6];
+  freq_factors[0] = 0.1;
+  freq_factors[1] = 0.2;
+  freq_factors[2] = 0.4;
+  freq_factors[3] = 0.6;
+  freq_factors[4] = 0.8;
+  freq_factors[5] = 1.0;
+
+  float error_freq = freq_factors[error_scale];
+  
+  deviceToHostCopy(x);
+
+  unsigned char* data_arr = reinterpret_cast<unsigned char*>(x->host_data);
+  // FIXIT: Need to be careful about floating point datatype assumptions
+  int size_of_elem = 4; 
+
+  long int total_bytes = x->size_in_bytes;
+  long int error_iterations = total_bytes * 0.01 * error_freq;
+  INFO("total_bytes = %lu, error_iterations = %lu \n", total_bytes, error_iterations);
+
+  srand(time(NULL));
+  
+  for(int i = 0; i < error_iterations; i++){
+    // FIXIT: The rand() is only specific to int - need long 
+    long int index = rand() % total_bytes;
+    int N = 5; // The operation below flips the Nth bit 
+    unsigned char fil = 1UL << N;
+    unsigned char val = data_arr[index];
+    char flipped = val^fil;
+    data_arr[i] = flipped;
+  }
+  
+
+  Norm_t* norms = calculateNorms2(x, x_original);
+  
+  profileEvent("tensorBitError_end", true);
+  
+  return (void*) norms;
+}
+
+
+void randomCeilAndFloor(float* x, size_t num_elems){
+
+  INFO("randomCeilAndFloor\n");
+  
+  std::random_device rd;
+  std::mt19937 mt(rd());
+  std::normal_distribution<float> distribution(0.0, 1.0);
+
+  for(size_t i = 0; i < num_elems; i++){
+    float rand_num = distribution(mt);
+    int val = abs(((int) rand_num) % 2);
+    if(val == 0)
+      x[i] = floor(x[i]);
+    else if(val == 1)
+      x[i] = ceil(x[i]);
+  }
+
+}
+
+// Routine for Adding RoundOff Errors
+void* addRoundError(void* x_ptr, int error_scale){
+
+  if(error_scale > 11 || error_scale < 0){
+    ERROR("Error Scale out of bounds \n");
+  }
+      
+  INFO("*** TensorRoundError \n");  
+  profileEvent("tensorRoundError");
+
+  Tensor* x = (Tensor*) x_ptr;
+  
+  size_t* dim_sizes = x->dims.dim_sizes; 
+  Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					        dim_sizes[0], dim_sizes[1],
+						dim_sizes[2], dim_sizes[3]);
+
+  // Copying x data into x_original - for computing Norms 
+  tensorCopy(x, x_original);
+
+  float round_factors[12];
+  round_factors[0] = 1000000; // FIXIT: This should be zero error
+  round_factors[1] = 100;
+  round_factors[2] = 10;
+  round_factors[3] = 7; // Beyond this point, the error function is linear
+  round_factors[4] = 3;
+  round_factors[5] = 1;
+  round_factors[6] = 0.7;
+  round_factors[7] = 0.3;
+  round_factors[8] = 0.1;
+  round_factors[9] = 0.07;
+  round_factors[10] = 0.03;
+  round_factors[11] = 0.01;
+  
+  // THINK: Considering using error magnitudes in this scenario
+  
+
+  float round_factor = round_factors[error_scale];
+  INFO("round_factor = %f \n", round_factor);
+  
+  hostToDeviceCopy(x);
+
+  int blockSize = 128;
+  int gridSize = (int) ceil ((float) x->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  // NOTE: Check if a large gridSize will work with really large tensors
+  vecConstMul<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems);
+  //vecRound<<<gridSize, blockSize>>>((float*) x->gpu_data, x->num_elems);
+  
+  deviceToHostCopy(x);
+  randomCeilAndFloor((float*) x->host_data, x->num_elems);
+  hostToDeviceCopy(x);
+  
+  vecConstDiv<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems);
+  
+  Norm_t* norms = calculateNorms2(x, x_original);
+  
+  profileEvent("tensorRoundError_end", true);
+  
+  return (void*) norms;
+}
+
+
+
+
+// Routine for Adding Gaussian Error
+void* addGaussianError(void* x_ptr, int error_scale){
+
+  if(error_scale > 11 || error_scale < 0){
+    ERROR("Error Scale out of bounds \n");
+  }
+      
+  INFO("*** TensorAddError \n");  
+  profileEvent("tensorAddError");
+
+  Tensor* x = (Tensor*) x_ptr;
+  
+  size_t* dim_sizes = x->dims.dim_sizes;
+  Tensor* bias = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					  dim_sizes[0], dim_sizes[1],
+					  dim_sizes[2], dim_sizes[3]);
+  
+  Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					        dim_sizes[0], dim_sizes[1],
+						dim_sizes[2], dim_sizes[3]);
+
+  // Copying x data into x_original - for computing Norms 
+  tensorCopy(x, x_original);
+
+  // NOTE: Error scale is used to generate the bias matrix
+  initRandValues(bias, error_scale);  
+
+  hostToDeviceCopy(x);
+  //hostToDeviceCopy(bias);
+
+ 
+  int blockSize = 1024;
+  int gridSize = (int) ceil ((float) x->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  // NOTE: Check if a large gridSize will work with really large tensors
+  vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems);
+  
+  float alpha = 1.0f, beta = 0.0f;
+    
+  // FIXIT: routine fails for 3D tensors
+  checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc,
+			    bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data));
+
+
+  //Norm_t* norms = calculateNorms2(x, x_original);
+  Norm_t* norms = calculateNormsGPU(x, x_original);
+  
+  
+  profileEvent("tensorAddError_end", true);
+  
+  return (void*) norms;
+}
+
+
+
+void* tensorAddError(void* x_ptr, int error_scale){
+
+  void * new_x = addGaussianError(x_ptr, error_scale);
+  //void * new_x = addRoundError(x_ptr, error_scale);
+  //void * new_x = addBitError(x_ptr, error_scale);
+  return new_x;
+}
+
+
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/.#error.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/.#error.h
new file mode 120000
index 0000000000000000000000000000000000000000..a9c72af5b6737a57be1db44cd3231c6dda0857f0
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/.#error.h
@@ -0,0 +1 @@
+hsharif3@tyler.cs.illinois.edu.21294:1541049775
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h
new file mode 100644
index 0000000000000000000000000000000000000000..4d38c6eea4451328ca040db67250dcaeae0df94d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h
@@ -0,0 +1,107 @@
+
+
+#ifndef RUNTIME_DEBUG
+#define RUNTIME_DEBUG
+
+#define LOG_DEBUG 0   // Sets the debug logging to true
+#define LOG_INFO 0  // Sets the info logging to true
+
+#include "tensor.h"
+
+
+#define FatalError(s) do {                                             \
+    std::stringstream _where, _message;                                \
+    _where << __FILE__ << ':' << __LINE__;                             \
+    _message << std::string(s) + "\n" << __FILE__ << ':' << __LINE__;  \
+    std::cerr << _message.str() << "\nAborting...\n";                  \
+    cudaDeviceReset();                                                 \
+    exit(1);                                                           \
+} while(0)
+
+
+#define checkCUDNN(status) do {                                        \
+    std::stringstream _error;                                          \
+    if (status != CUDNN_STATUS_SUCCESS) {                              \
+      _error << "CUDNN failure: " << cudnnGetErrorString(status);      \
+      FatalError(_error.str());                                        \
+    }                                                                  \
+} while(0)
+
+
+#define checkCudaErrors(status) do {                                   \
+    std::stringstream _error;                                          \
+    if (status != 0) {                                                 \
+      _error << "Cuda failure: " << status;                            \
+      FatalError(_error.str());                                        \
+    }                                                                  \
+} while(0)
+
+
+
+void INFO(char* format, ...){
+  if(!LOG_INFO) // Don't print if logging info is disabled
+    return;
+  va_list args;
+  va_start(args, format);
+  printf("INFO: ");
+  vprintf(format, args);
+  va_end(args);
+}
+
+void DEBUG(char* format, ...){
+  if(!LOG_DEBUG) // Don't print if logging info is disabled
+    return;
+  va_list args;
+  va_start(args, format);
+  printf("DEBUG: ");
+  vprintf(format, args);
+  va_end(args);
+}
+
+void ERROR(char* format, ...){
+  if(!LOG_DEBUG) // Don't print if logging info is disabled
+    return;
+  va_list args;
+  va_start(args, format);
+  printf("ERROR!: ");
+  vprintf(format, args);
+  va_end(args);
+
+  abort();
+}
+
+
+void fillOnes(struct Tensor* tensor){
+  // initialization is specific to the floating point type
+  if(tensor->data_type == CUDNN_DATA_FLOAT){
+    float* data_arr = (float*) tensor->host_data;
+    for(unsigned int i = 0; i < tensor->num_elems; i++){
+      data_arr[i] = 1.0;    
+    }
+  }
+}
+
+
+void printTensorDescInfo(struct Tensor* tensor){
+
+  cudnnDataType_t dType;
+  int nStride, cStride, hStride, wStride;
+  int size1, size2, size3, size4;
+  cudnnGetTensor4dDescriptor(tensor->tensor_desc,
+  			     &dType,
+  			     &size1, &size2, &size3, &size4,
+  			     &nStride, &cStride, &hStride, &wStride);
+
+  DEBUG("dType = %d, size1 = %d, size2 = %d, size3 = %d, size4 = %d \n",
+  	 dType, size1, size2, size3, size4);
+  
+  DEBUG("nStride = %d, cStride = %d, hStride = %d, wStride = %d \n",
+  	 nStride, cStride, hStride, wStride);
+  
+}
+
+
+
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
new file mode 100644
index 0000000000000000000000000000000000000000..e0b0a87eab74a39566cb6864dac5b85e705034db
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
@@ -0,0 +1,630 @@
+
+#ifndef ERROR_HEADER
+#define ERROR_HEADER
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+#include <ctime>
+#include <cfloat>
+#include <algorithm>
+#include <sstream>
+#include <vector>
+#include <iostream>
+#include <random>
+#include <string>
+#include <time.h>
+
+#include <curand.h>
+#include <curand_kernel.h>
+#include <math.h>
+#include <assert.h>
+
+
+#include "../include/debug.h"
+#include "tensor.h"
+#include "profiling.h"
+#include "tensor_utils.cu"
+#include "global_data.h"
+
+
+
+
+void readOpenTunerFlags(char* file_name){
+
+  total_ops = 0;
+  op_counter = 0;
+  op_accuracies.clear();
+
+  
+  FILE* fp = fopen(file_name, "r");
+  if(fp == NULL){
+    ERROR("File 'opentuner_flags' not found \n");
+  }
+    
+  int retVal = 200;
+  while(retVal != EOF){
+
+    int op_acc;
+    if(fp != NULL)
+      retVal = fscanf(fp, "%d", &op_acc);
+    else
+      op_acc = 0;
+    
+    op_accuracies.push_back(op_acc);
+    //printf("op_accuracies = %d, total_ops =%d \n", op_accuracies[total_ops], total_ops);
+    total_ops++;
+  }
+  
+  fclose(fp);
+}
+
+
+
+
+
+/*__device__ inline void atomicAdd(float* address, float value)
+
+{
+
+  float old = value;
+  float new_old;
+
+  do{
+    new_old = atomicExch(address, 0.0f);
+    new_old += old;
+  }
+
+  while ((old = atomicExch(address, new_old))!=0.0f);
+
+};
+*/
+
+
+
+
+
+Norm_t* calculateNorms(Tensor* x, Tensor* x_orig){
+
+  deviceToHostCopy(x);
+  deviceToHostCopy(x_orig);
+
+  // NOTE: Move floats to doubles - overflow is quite possible
+  float l1_norm = 0.0;
+  float l2_norm = 0.0;
+  float inf_norm = -1.0;
+  double total = 0.0;
+
+  float* arr1 = (float*) x->host_data;
+  float* arr2 = (float*) x_orig->host_data;
+  
+  for(unsigned int i = 0; i < x->num_elems; i++){
+
+    total = total + arr2[i];
+    
+    float diff = abs(arr1[i] - arr2[i]);
+    l1_norm += diff;
+    l2_norm += (arr1[i] - arr2[i]) *  (arr1[i] - arr2[i]);
+
+    if(inf_norm < diff)
+      inf_norm = diff;
+  }
+
+  l1_norm = l1_norm / (x->num_elems * 1.0);
+  l2_norm = l2_norm / (x->num_elems * 1.0);
+
+  double distribution_mean = total / (x->num_elems * 1.0);
+  l1_norm = l1_norm / distribution_mean;
+  l2_norm = l2_norm / distribution_mean;
+
+    
+  Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t));
+  norms->l1_norm = l1_norm;
+  norms->l2_norm = l2_norm;
+  norms->inf_norm = inf_norm;  
+  
+  INFO("l1_norm = %f \n", l1_norm);
+  INFO("l2_norm = %f \n", l2_norm);
+  INFO("inf_norm = %f \n", inf_norm);
+
+  return norms;
+}
+
+
+
+Norm_t* calculateNorms2(Tensor* x, Tensor* x_orig){
+
+  deviceToHostCopy(x);
+  deviceToHostCopy(x_orig);
+
+  // NOTE: Move all floats to doubles - overflow is quite possible
+  double l0_norm_A = 0.0;
+  double l0_norm_B = 0.0;
+
+  double l1_norm_A = 0.0;
+  double l1_norm_B = 0.0;
+  
+  double l2_norm_A = 0.0;
+  double l2_norm_B = 0.0;
+  float inf_norm = -1.0;
+  float orig_inf_norm = -1.0;
+  double total_diff = 0.0;
+  double total_diff_squared = 0.0;
+ 
+  float* arr1 = (float*) x->host_data;
+  float* arr2 = (float*) x_orig->host_data;
+  
+  for(unsigned int i = 0; i < x->num_elems; i++){
+
+    if(arr2[i] != 0.0)
+      l0_norm_A = l0_norm_A + 1.0;
+    if(arr1[i] != 0.0)
+      l0_norm_B = l0_norm_B + 1.0;
+        
+    l1_norm_A = l1_norm_A + abs(arr2[i]);
+    l1_norm_B = l1_norm_B + abs(arr1[i]);
+
+    l2_norm_A = l2_norm_A + (arr2[i] * arr2[i]);
+    l2_norm_B = l2_norm_B + (arr1[i] * arr1[i]);
+      
+    float diff = abs(arr1[i] - arr2[i]);
+    total_diff = total_diff + diff;
+    float diff_squared = diff * diff;
+    total_diff_squared = total_diff_squared + diff_squared; 
+
+
+    if(orig_inf_norm < diff){
+      orig_inf_norm = diff;
+    }
+    
+    // Relative difference value
+    float normalized_diff = diff / arr2[i];   
+    if(inf_norm < normalized_diff){
+      inf_norm = normalized_diff;
+    }    
+  }
+
+  // Relative L1 and Mean L1 norms of the difference Matrix
+  float mean_l1 = ( total_diff ) / x->num_elems;
+  float relative_l1 = ( total_diff ) / l1_norm_A;
+
+  // Computing Relative L2 norm - i.e., Euclidean distance
+  double norm_root_A = sqrt(l2_norm_A);
+  double diff_root = sqrt(total_diff_squared);
+  float mean_l2 = diff_root / x->num_elems;
+  float relative_l2 = diff_root / norm_root_A;
+
+  // Packing computed norms in Norm_t struct
+  Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t));
+  // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware
+  norms->mean_l1 = mean_l1;
+  norms->mean_l2 = mean_l2;
+  norms->orig_inf_norm = orig_inf_norm;
+
+  // Relative metrics (relative to distribution) - suitable for PROMISE
+  norms->l1_norm = relative_l1;
+  norms->l2_norm = relative_l2;
+  norms->inf_norm = inf_norm;  
+  
+  INFO("l1_norm = %f \n", relative_l1);
+  INFO("l2_norm = %f \n", relative_l2);
+  INFO("inf_norm = %f \n", inf_norm);
+
+  return norms;
+}
+
+
+
+
+
+__global__ void normComputeKernel(float* A, float * B, double* l1_A, double* l2_A,
+				  double* l1_diff, double* l2_diff, unsigned int n){
+
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(i < n){
+    
+    double diff = fabsf(A[i] - B[i]);
+    double diff_squared = diff * diff;   
+
+    atomicAdd( l1_A,  fabsf(A[i]) );
+    atomicAdd( l2_A, (A[i] * A[i]) );
+
+    atomicAdd( l1_diff, diff);
+    atomicAdd( l2_diff, diff_squared);
+  }
+}
+
+
+
+// Compute Norms on the GPU
+Norm_t* calculateNormsGPU(Tensor* x, Tensor* x_orig){
+
+  hostToDeviceCopy(x);
+  hostToDeviceCopy(x_orig);
+
+  // FIXIT: Move all floats to doubles - overflow is possible
+  
+  double l1_norm_A;
+  double l2_norm_A;
+
+  double l1_diff;
+  double l2_diff;
+
+  // Device pointers
+  double *l1_norm_A_d;
+  double *l2_norm_A_d;
+  double *l1_diff_d;
+  double *l2_diff_d;
+  
+  cudaMalloc( (void**) &l1_norm_A_d, sizeof(double));
+  cudaMalloc( (void**) &l2_norm_A_d, sizeof(double));
+  cudaMalloc( (void**) &l1_diff_d, sizeof(double));
+  cudaMalloc( (void**) &l2_diff_d, sizeof(double));
+ 
+    
+  float* arr1 = (float*) x->gpu_data;
+  float* arr2 = (float*) x_orig->gpu_data;
+
+  int blockSize = 1024;
+  int gridSize = (int) ceil ((float) x->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  normComputeKernel<<<gridSize, blockSize>>>(arr1, arr2, l1_norm_A_d, l2_norm_A_d, l1_diff_d, l2_diff_d, x->num_elems);
+
+  cudaMemcpy(&l1_norm_A, l1_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost);
+  cudaMemcpy(&l2_norm_A, l2_norm_A_d, sizeof(double), cudaMemcpyDeviceToHost);
+  cudaMemcpy(&l1_diff, l1_diff_d, sizeof(double), cudaMemcpyDeviceToHost);
+  cudaMemcpy(&l2_diff, l2_diff_d, sizeof(double), cudaMemcpyDeviceToHost);
+  
+
+  // Relative L1 and Mean L1 norms of the difference Matrix
+  float mean_l1 = l1_diff / x->num_elems;
+  float relative_l1 = l1_diff / l1_norm_A;
+
+  // Computing Relative L2 norm - i.e., Euclidean distance
+  double norm_root_A = sqrt(l2_norm_A);
+  double diff_root = sqrt(l2_diff);
+  float mean_l2 = diff_root / x->num_elems;
+  float relative_l2 = diff_root / norm_root_A;
+
+  // Packing computed norms in Norm_t struct
+  Norm_t* norms = (Norm_t*) malloc(sizeof(Norm_t));
+  // Mean metrics - not normalized for the distribution - suitable for precision tuning hardware
+  norms->mean_l1 = mean_l1;
+  norms->mean_l2 = mean_l2;
+  norms->orig_inf_norm = 0.0;
+
+  // Relative metrics (relative to distribution) - suitable for PROMISE
+  norms->l1_norm = relative_l1;
+  norms->l2_norm = relative_l2;
+  norms->inf_norm = 0.0;  
+  
+  INFO("l1_norm = %f \n", relative_l1);
+  INFO("l2_norm = %f \n", relative_l2);
+
+  return norms;
+}
+
+
+
+
+__global__ void vecConstMul(float* A, float mul_factor, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    A[id] = A[id] * mul_factor; 
+}
+
+
+__global__ void vecRound(float* A, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    A[id] = roundf(A[id]); 
+}
+
+
+__global__ void vecConstDiv(float* A, float div_factor, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    A[id] = A[id] / div_factor; 
+}
+
+
+
+__global__ void vecMul(float* A, float* B, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n)
+    B[id] = A[id] * B[id]; 
+}
+
+
+/****  ERROR injecion routines  ******/
+
+void initRandValues(Tensor* bias, int error_scale){
+
+  float scaling_values[20];
+
+  // FIXIT: Error knob 0 should be 0 zero
+  scaling_values[0] = 0.016;
+  scaling_values[1] = 0.018;
+  scaling_values[2] = 0.022;
+  scaling_values[3] = 0.026;
+  scaling_values[4] = 0.030;
+  scaling_values[5] = 0.035;  
+  scaling_values[6] = 0.04;
+  scaling_values[7] = 0.06;
+  scaling_values[8] = 0.08;
+  scaling_values[9] = 0.1;
+  //scaling_values[8] = 0.15;
+  //scaling_values[9] = 0.2;
+  scaling_values[10] = 0.25;
+  scaling_values[11] = 0.3;
+  scaling_values[12] = 0.35;
+  scaling_values[13] = 0.4;
+  scaling_values[14] = 0.45;
+  // Values below are currently unused by Opentuner
+  scaling_values[15] = 0.5;
+  scaling_values[16] = 0.55;
+  scaling_values[17] = 0.6;
+  scaling_values[18] = 0.65;
+  scaling_values[19] = 0.7;
+
+
+  curandGenerator_t gen;
+
+  struct timespec ts;
+
+  if(timespec_get(&ts, TIME_UTC) == 0){
+    printf("crashed \n");
+    abort();
+  }
+
+  curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
+
+  curandSetPseudoRandomGeneratorSeed(gen, ts.tv_nsec^ts.tv_sec);
+    
+  curandGenerateNormal(gen, (float*) bias->gpu_data, bias->num_elems, 0.0, 1.0 * scaling_values[error_scale]);
+
+  
+  /*
+  std::random_device rd;
+  std::mt19937 mt(rd());
+  std::normal_distribution<float> distribution(0.0, 1.0);
+  
+  float* data_arr = (float*) bias->host_data;
+  for(int i = 0; i < bias->num_elems; i++){
+    float rand_num = distribution(mt);
+    data_arr[i] = scaling_values[error_scale] * rand_num;   
+  }
+  */
+  
+}
+
+
+
+void* addBitError(void* x_ptr, int error_scale){
+
+  if(error_scale > 6 || error_scale < 0){
+    ERROR("Error Scale out of bounds \n");
+  }
+      
+  INFO("*** TensorBitError \n");  
+  profileEvent("tensorBitError");
+
+  Tensor* x = (Tensor*) x_ptr;
+  
+  size_t* dim_sizes = x->dims.dim_sizes; 
+  Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					        dim_sizes[0], dim_sizes[1],
+						dim_sizes[2], dim_sizes[3]);
+
+  // Copying x data into x_original - for computing Norms 
+  tensorCopy(x, x_original);
+
+  // Quadratic Error
+  float freq_factors[6];
+  freq_factors[0] = 0.1;
+  freq_factors[1] = 0.2;
+  freq_factors[2] = 0.4;
+  freq_factors[3] = 0.6;
+  freq_factors[4] = 0.8;
+  freq_factors[5] = 1.0;
+
+  float error_freq = freq_factors[error_scale];
+  
+  deviceToHostCopy(x);
+
+  unsigned char* data_arr = reinterpret_cast<unsigned char*>(x->host_data);
+  // FIXIT: Need to be careful about floating point datatype assumptions
+  int size_of_elem = 4; 
+
+  long int total_bytes = x->size_in_bytes;
+  long int error_iterations = total_bytes * 0.01 * error_freq;
+  INFO("total_bytes = %lu, error_iterations = %lu \n", total_bytes, error_iterations);
+
+  srand(time(NULL));
+  
+  for(int i = 0; i < error_iterations; i++){
+    // FIXIT: The rand() is only specific to int - need long 
+    long int index = rand() % total_bytes;
+    int N = 5; // The operation below flips the Nth bit 
+    unsigned char fil = 1UL << N;
+    unsigned char val = data_arr[index];
+    char flipped = val^fil;
+    data_arr[i] = flipped;
+  }
+  
+
+  Norm_t* norms = calculateNorms2(x, x_original);
+  
+  profileEvent("tensorBitError_end", true);
+  
+  return (void*) norms;
+
+}
+
+
+void randomCeilAndFloor(float* x, size_t num_elems){
+
+  INFO("randomCeilAndFloor\n");
+  
+  std::random_device rd;
+  std::mt19937 mt(rd());
+  std::normal_distribution<float> distribution(0.0, 1.0);
+
+  for(size_t i = 0; i < num_elems; i++){
+    float rand_num = distribution(mt);
+    int val = abs(((int) rand_num) % 2);
+    if(val == 0)
+      x[i] = floor(x[i]);
+    else if(val == 1)
+      x[i] = ceil(x[i]);
+  }
+
+}
+
+// Routine for Adding RoundOff Errors
+void* addRoundError(void* x_ptr, int error_scale){
+
+  if(error_scale > 11 || error_scale < 0){
+    ERROR("Error Scale out of bounds \n");
+  }
+      
+  INFO("*** TensorRoundError \n");  
+  profileEvent("tensorRoundError");
+
+  Tensor* x = (Tensor*) x_ptr;
+  
+  size_t* dim_sizes = x->dims.dim_sizes; 
+  Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					        dim_sizes[0], dim_sizes[1],
+						dim_sizes[2], dim_sizes[3]);
+
+  // Copying x data into x_original - for computing Norms 
+  tensorCopy(x, x_original);
+
+  float round_factors[12];
+  round_factors[0] = 1000000; // FIXIT: This should be zero error
+  round_factors[1] = 100;
+  round_factors[2] = 10;
+  round_factors[3] = 7; // Beyond this point, the error function is linear
+  round_factors[4] = 3;
+  round_factors[5] = 1;
+  round_factors[6] = 0.7;
+  round_factors[7] = 0.3;
+  round_factors[8] = 0.1;
+  round_factors[9] = 0.07;
+  round_factors[10] = 0.03;
+  round_factors[11] = 0.01;
+  
+  // THINK: Considering using error magnitudes in this scenario
+  
+
+  float round_factor = round_factors[error_scale];
+  INFO("round_factor = %f \n", round_factor);
+  
+  hostToDeviceCopy(x);
+
+  int blockSize = 128;
+  int gridSize = (int) ceil ((float) x->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  // NOTE: Check if a large gridSize will work with really large tensors
+  vecConstMul<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems);
+  //vecRound<<<gridSize, blockSize>>>((float*) x->gpu_data, x->num_elems);
+  
+  deviceToHostCopy(x);
+  randomCeilAndFloor((float*) x->host_data, x->num_elems);
+  hostToDeviceCopy(x);
+  
+  vecConstDiv<<<gridSize, blockSize>>>((float*) x->gpu_data, round_factor, x->num_elems);
+  
+  Norm_t* norms = calculateNorms2(x, x_original);
+  
+  profileEvent("tensorRoundError_end", true);
+  
+  return (void*) norms;
+}
+
+
+
+
+// Routine for Adding Gaussian Error
+void* addGaussianError(void* x_ptr, int error_scale){
+
+  if(error_scale > 11 || error_scale < 0){
+    ERROR("Error Scale out of bounds \n");
+  }
+      
+  INFO("*** TensorAddError \n");  
+  profileEvent("tensorAddError");
+
+  Tensor* x = (Tensor*) x_ptr;
+  
+  size_t* dim_sizes = x->dims.dim_sizes;
+  Tensor* bias = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					  dim_sizes[0], dim_sizes[1],
+					  dim_sizes[2], dim_sizes[3]);
+  
+  Tensor* x_original = (Tensor*) create4DTensor(x->data_type, x->data_format,
+					        dim_sizes[0], dim_sizes[1],
+						dim_sizes[2], dim_sizes[3]);
+
+  // Copying x data into x_original - for computing Norms 
+  tensorCopy(x, x_original);
+
+  // NOTE: Error scale is used to generate the bias matrix
+  initRandValues(bias, error_scale);  
+
+  hostToDeviceCopy(x);
+  //hostToDeviceCopy(bias);
+
+ 
+  int blockSize = 1024;
+  int gridSize = (int) ceil ((float) x->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  // NOTE: Check if a large gridSize will work with really large tensors
+  vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems);
+  
+  float alpha = 1.0f, beta = 0.0f;
+    
+  // FIXIT: routine fails for 3D tensors
+  checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc,
+			    bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data));
+
+
+  //Norm_t* norms = calculateNorms2(x, x_original);
+  Norm_t* norms = calculateNormsGPU(x, x_original);
+  
+  
+  profileEvent("tensorAddError_end", true);
+  
+  return (void*) norms;
+}
+
+
+
+void* tensorAddError(void* x_ptr, int error_scale){
+
+  void * new_x = addGaussianError(x_ptr, error_scale);
+  //void * new_x = addRoundError(x_ptr, error_scale);
+  //void * new_x = addBitError(x_ptr, error_scale);
+  return new_x;
+}
+
+
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h
new file mode 100644
index 0000000000000000000000000000000000000000..252427c65379aa977237652eb4435e685dbc3403
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_conversion.h
@@ -0,0 +1,114 @@
+// Copyright (c) 1993-2016, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This code modified from the public domain code here: 
+// https://gist.github.com/rygorous/2156668
+// The URL above includes more robust conversion routines
+// that handle Inf and NaN correctly. 
+// 
+// It is recommended to use the more robust versions in production code.
+
+typedef unsigned uint;
+
+union FP32
+{
+    uint u;
+    float f;
+    struct
+    {
+        uint Mantissa : 23;
+        uint Exponent : 8;
+        uint Sign : 1;
+    };
+};
+
+union FP16
+{
+    unsigned short u;
+    struct
+    {
+        uint Mantissa : 10;
+        uint Exponent : 5;
+        uint Sign : 1;
+    };
+};
+
+// Approximate solution. This is faster but converts some sNaNs to
+// infinity and doesn't round correctly. Handle with care.
+// Approximate solution. This is faster but converts some sNaNs to
+// infinity and doesn't round correctly. Handle with care.
+static half approx_float_to_half(float fl)
+{
+    FP32 f32infty = { 255 << 23 };
+    FP32 f16max = { (127 + 16) << 23 };
+    FP32 magic = { 15 << 23 };
+    FP32 expinf = { (255 ^ 31) << 23 };
+    uint sign_mask = 0x80000000u;
+    FP16 o = { 0 };
+
+    FP32 f = *((FP32*)&fl);
+
+    uint sign = f.u & sign_mask;
+    f.u ^= sign;
+
+    if (!(f.f < f32infty.u)) // Inf or NaN
+        o.u = f.u ^ expinf.u;
+    else
+    {
+        if (f.f > f16max.f) f.f = f16max.f;
+        f.f *= magic.f;
+    }
+
+    o.u = f.u >> 13; // Take the mantissa bits
+    o.u |= sign >> 16;
+    return *((half*)&o);
+}
+
+// from half->float code - just for verification.
+static float half_to_float(half hf)
+{
+    FP16 h = *((FP16*)&hf);
+
+    static const FP32 magic = { 113 << 23 };
+    static const uint shifted_exp = 0x7c00 << 13; // exponent mask after shift
+    FP32 o;
+
+    o.u = (h.u & 0x7fff) << 13;     // exponent/mantissa bits
+    uint exp = shifted_exp & o.u;   // just the exponent
+    o.u += (127 - 15) << 23;        // exponent adjust
+
+    // handle exponent special cases
+    if (exp == shifted_exp) // Inf/NaN?
+        o.u += (128 - 16) << 23;    // extra exp adjust
+    else if (exp == 0) // Zero/Denormal?
+    {
+        o.u += 1 << 23;             // extra exp adjust
+        o.f -= magic.f;             // renormalize
+    }
+
+    o.u |= (h.u & 0x8000) << 16;    // sign bit
+    return o.f;
+}
\ No newline at end of file
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h
new file mode 100644
index 0000000000000000000000000000000000000000..64aee8231b54d52710192fc7d598d6ed162f1338
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_emu.h
@@ -0,0 +1,274 @@
+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// Conversion from/to 16-bit floating point (half-precision).
+
+#if !defined(_FP16_EMU_H_)
+#define _FP16_EMU_H_
+
+#include <driver_types.h>
+#include <cuda_fp16.h>
+
+// Necessary to ensure visibility of CUDART_VERSION macro
+#include <cuda_runtime_api.h>
+
+// Definition of '__half_raw' was not provided before CUDA 9.0.
+// '__half_raw' is our type where the unsigned 16-bit integer 
+// data member 'x' can be accessed in both CUDA 9.0 and 8.0.
+#if CUDART_VERSION < 9000 
+typedef __half __half_raw;
+#endif
+
+// Internally, in CUDNN we use half1 struct as the FP16 type.
+typedef __half half1;
+
+#define HLF_EPSILON 4.887581E-04
+#define HLF_MIN     6.103516E-05
+#define HLF_MAX     6.550400E+04
+
+half1 cpu_float2half_rn(float f);
+
+float cpu_half2float(half1 h);
+
+static __inline__ __device__ __host__ half1 habs(half1 h)
+{
+    __half_raw hr = reinterpret_cast<__half_raw&>(h);
+    hr.x &= 0x7fffU;
+    return reinterpret_cast<half1&>(hr);
+}
+
+static __inline__ __device__ __host__ half1 hneg(half1 h)
+{
+    __half_raw hr = reinterpret_cast<__half_raw&>(h);
+    hr.x ^= 0x8000U;
+    return reinterpret_cast<half1&>(hr);
+}
+
+static __inline__ __device__ __host__ int ishnan(half1 h)
+{
+    // When input is NaN, exponent is all ones and mantissa is non-zero.
+    __half_raw hr = reinterpret_cast<__half_raw&>(h);
+    return (hr.x & 0x7c00U) == 0x7c00U && (hr.x & 0x03ffU) != 0;
+}
+
+static __inline__ __device__ __host__ int ishinf(half1 h)
+{
+    // When input is +/- inf, exponent is all ones and mantissa is zero.
+    __half_raw hr = reinterpret_cast<__half_raw&>(h);
+    return (hr.x & 0x7c00U) == 0x7c00U && (hr.x & 0x03ffU) == 0;
+}
+
+static __inline__ __device__ __host__ int ishequ(half1 x, half1 y)
+{
+    __half_raw xr = reinterpret_cast<__half_raw&>(x);
+    __half_raw yr = reinterpret_cast<__half_raw&>(y);
+    return ishnan(x) == 0 && ishnan(y) == 0 && xr.x == yr.x;
+}
+
+// Returns 0.0000 in FP16 binary form
+static __inline__ __device__ __host__ half1 hzero()
+{
+    __half_raw hr;
+    hr.x = 0x0000U;
+    return reinterpret_cast<half1&>(hr);
+}
+
+// Returns 1.0000 in FP16 binary form
+static __inline__ __device__ __host__ half1 hone()
+{
+    __half_raw hr;
+    hr.x = 0x3c00U;
+    return reinterpret_cast<half1&>(hr);
+}
+
+// Returns quiet NaN, the most significant fraction bit #9 is set
+static __inline__ __device__ __host__ half1 hnan()
+{
+    __half_raw hr;
+    hr.x = 0x7e00U;
+    return reinterpret_cast<half1&>(hr);
+}
+
+// Largest positive FP16 value, corresponds to 6.5504e+04
+static __inline__ __device__ __host__ half1 hmax()
+{
+    // Exponent all ones except LSB (0x1e), mantissa is all ones (0x3ff)
+    __half_raw hr;
+    hr.x = 0x7bffU;
+    return reinterpret_cast<half1&>(hr);
+}
+
+// Smallest positive (normalized) FP16 value, corresponds to 6.1035e-05
+static __inline__ __device__ __host__ half1 hmin()
+{
+    // Exponent is 0x01 (5 bits), mantissa is all zeros (10 bits)
+    __half_raw hr;
+    hr.x = 0x0400U;
+    return reinterpret_cast<half1&>(hr);
+}
+
+
+
+
+
+
+
+
+
+
+
+#define STATIC_ASSERT(cond) do { typedef char compile_time_assert[(cond) ? 1 : -1]; } while (0)
+
+// Host functions for converting between FP32 and FP16 formats
+// Paulius Micikevicius (pauliusm@nvidia.com)
+
+half1 cpu_float2half_rn(float f)
+{
+    unsigned x = *((int*)(void*)(&f));
+    unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
+    unsigned sign, exponent, mantissa;
+
+    __half_raw hr;
+
+    // Get rid of +NaN/-NaN case first.
+    if (u > 0x7f800000) {
+        hr.x = 0x7fffU;
+        return reinterpret_cast<half1&>(hr);
+    }
+  
+    sign = ((x >> 16) & 0x8000);
+  
+    // Get rid of +Inf/-Inf, +0/-0.
+    if (u > 0x477fefff) {
+        hr.x = sign | 0x7c00U;
+        return reinterpret_cast<half1&>(hr);
+    }
+    if (u < 0x33000001) {
+        hr.x = sign | 0x0000U;
+        return reinterpret_cast<half1&>(hr);
+    }
+
+    exponent = ((u >> 23) & 0xff);
+    mantissa = (u & 0x7fffff);
+
+    if (exponent > 0x70) {
+        shift = 13;
+        exponent -= 0x70;
+    } else {
+        shift = 0x7e - exponent;
+        exponent = 0;
+        mantissa |= 0x800000;
+    }
+    lsb = (1 << shift);
+    lsb_s1 = (lsb >> 1);
+    lsb_m1 = (lsb - 1);
+  
+    // Round to nearest even.
+    remainder = (mantissa & lsb_m1);
+    mantissa >>= shift;
+    if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
+        ++mantissa;
+        if (!(mantissa & 0x3ff)) {
+            ++exponent;
+            mantissa = 0;
+        }
+    }  
+
+    hr.x = (sign | (exponent << 10) | mantissa);  
+
+    return reinterpret_cast<half1&>(hr);
+}
+
+
+float cpu_half2float(half1 h)
+{
+    STATIC_ASSERT(sizeof(int) == sizeof(float));
+
+    __half_raw hr = reinterpret_cast<__half_raw&>(h);
+
+    unsigned sign     = ((hr.x >> 15) & 1);
+    unsigned exponent = ((hr.x >> 10) & 0x1f);
+    unsigned mantissa = ((hr.x & 0x3ff) << 13);
+
+    if (exponent == 0x1f) {  /* NaN or Inf */
+        mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
+        exponent = 0xff;
+    } else if (!exponent) {  /* Denorm or Zero */
+        if (mantissa) {
+            unsigned int msb;
+            exponent = 0x71;
+            do {
+                msb = (mantissa & 0x400000);
+                mantissa <<= 1;  /* normalize */
+                --exponent;
+            } while (!msb);
+            mantissa &= 0x7fffff;  /* 1.mantissa is implicit */
+        }
+    } else {
+        exponent += 0x70;
+    }
+
+    int temp = ((sign << 31) | (exponent << 23) | mantissa);
+
+    return reinterpret_cast<float&>(temp);
+}
+
+
+
+
+
+
+
+#endif  // _FP16_EMU_H_
+
+
+
+
+
+
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d2b4108ec1b49c6e4bd6b040e9cf2fb82143f129
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/fp16_gemm.cu
@@ -0,0 +1,272 @@
+#include <iostream>
+#include <string>
+#include <cublas_v2.h>
+#include <cuda_fp16.h>
+#include "fp16_emu.h"
+
+inline cudaError_t checkCuda(cudaError_t result) {
+    if (result != cudaSuccess)
+        std::cerr << "CUDA Runtime Error: " << cudaGetErrorString(result) << "\n";
+    return result;
+}
+
+inline cublasStatus_t checkCublas(cublasStatus_t result) {
+    if (result != CUBLAS_STATUS_SUCCESS)
+        std::cerr << "cuBLAS Error: " << result << "\n";
+    return result;
+}
+
+template <typename T>
+inline void printArray(const T * const __restrict__ array,
+                       const unsigned elements) {
+    for (unsigned i = 0; i < elements; i++)
+        std::cout << std::to_string(array[i]) << "\n";
+}
+
+// initialization
+template <typename T>
+__global__ void initKernel(T * const __restrict__ array,
+                           const unsigned elements) {
+    const unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < elements)
+        array[idx] = 1.2;
+}
+
+template <typename T>
+void init(T * const __restrict__ array,
+          const unsigned elements) {
+    const unsigned block_size = 512;
+    const unsigned num_blocks = (elements + block_size - 1) / block_size;
+    initKernel<<<num_blocks, block_size>>>(array, elements);
+    checkCuda(cudaDeviceSynchronize());
+}
+
+// float to half
+__global__ void f2hKernel(const float * const __restrict__ input,
+                          const unsigned elements,
+                          half * const __restrict__ output) {
+    const unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < elements)
+        output[idx] = __float2half_rn(input[idx]);
+}
+
+void f2h(const float * const __restrict__ input,
+         const unsigned elements,
+         half * const __restrict__ output) {
+    const unsigned block_size = 512;
+    const unsigned num_blocks = (elements + block_size - 1) / block_size;
+    f2hKernel<<<num_blocks, block_size>>>(input, elements, output);
+    checkCuda(cudaDeviceSynchronize());
+}
+
+// half to float
+__global__ void h2fKernel(const half * const __restrict__ input,
+                          const unsigned elements,
+                          float * const __restrict__ output) {
+    const unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < elements)
+        output[idx] = __half2float(input[idx]);
+}
+
+void h2f(const half * const __restrict__ input,
+         const unsigned elements,
+         float * const __restrict__ output) {
+    const unsigned block_size = 512;
+    const unsigned num_blocks = (elements + block_size - 1) / block_size;
+    h2fKernel<<<num_blocks, block_size>>>(input, elements, output);
+    checkCuda(cudaDeviceSynchronize());
+}
+
+void sgemm(const float * const __restrict__ a,
+           const unsigned num_rows_a,
+           const unsigned num_cols_a,
+           const float * const __restrict__ b,
+           const unsigned num_rows_b,
+           const unsigned num_cols_b,
+           float * const __restrict__ c) {
+    const unsigned iterations = 10;
+    float kernel_time;
+    cudaEvent_t start;
+    cudaEvent_t stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+
+    cublasHandle_t handle;
+    checkCublas(cublasCreate(&handle));
+
+    // Enable Tensor Cores
+    checkCublas(cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH));
+
+    const float alpha_ = 1.0;
+    const float beta_  = 0.0;
+    const float *alpha = &alpha_;
+    const float *beta  = &beta_;
+
+    cudaEventRecord(start, 0);
+    for (unsigned i = 0; i < iterations; i++) {
+        checkCublas(cublasGemmEx(handle,
+                                 CUBLAS_OP_N,
+                                 CUBLAS_OP_N,
+                                 // Dimensions
+                                 num_rows_a,
+                                 num_cols_b,
+                                 num_cols_a,
+                                 alpha,
+                                 // A
+                                 a,
+                                 CUDA_R_32F,
+                                 num_rows_a,
+                                 // B
+                                 b,
+                                 CUDA_R_32F,
+                                 num_rows_b,
+                                 beta,
+                                 // C
+                                 c,
+                                 CUDA_R_32F,
+                                 num_rows_a,
+                                 // Compute precision and algorithm
+                                 CUDA_R_32F,
+                                 CUBLAS_GEMM_DEFAULT_TENSOR_OP));
+    }
+    cudaEventRecord(stop, 0);
+    cudaEventSynchronize(stop);
+    cudaEventElapsedTime(&kernel_time, start, stop);
+
+    std::cout << "FP32 GEMM: " << std::to_string(kernel_time / iterations) << " ms\n";
+}
+
+void hgemm(const float * const __restrict__ af,
+           const unsigned num_rows_a,
+           const unsigned num_cols_a,
+           const float * const __restrict__ bf,
+           const unsigned num_rows_b,
+           const unsigned num_cols_b,
+           float * const __restrict__ cf) {
+    const unsigned iterations = 10;
+
+    const unsigned num_elements_a = num_rows_a * num_cols_a;
+    const unsigned num_elements_b = num_rows_b * num_cols_b;
+    const unsigned num_elements_c = num_rows_a * num_cols_b;
+
+    float to_fp16_time;
+    float to_fp32_time;
+    float kernel_time;
+    float total_time;
+
+    cudaEvent_t start;
+    cudaEvent_t stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+
+    half *a;
+    half *b;
+    half *c;
+
+    checkCuda(cudaMallocManaged(&a, sizeof(half) * num_elements_a));
+    checkCuda(cudaMallocManaged(&b, sizeof(half) * num_elements_b));
+    checkCuda(cudaMallocManaged(&c, sizeof(half) * num_elements_c));
+
+    init(a, num_elements_a);
+    init(b, num_elements_b);
+    init(c, num_elements_c);
+
+    // Convert floats to halfs
+    cudaEventRecord(start, 0);
+    f2h(af, num_elements_a, a);
+    f2h(bf, num_elements_b, b);
+    cudaEventRecord(stop, 0);
+    cudaEventSynchronize(stop);
+    cudaEventElapsedTime(&to_fp16_time, start, stop);
+
+    cublasHandle_t handle;
+    checkCublas(cublasCreate(&handle));
+    checkCublas(cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH));
+
+    const half alpha_ = cpu_float2half_rn(1.0);
+    const half beta_  = cpu_float2half_rn(0.0);
+    const half *alpha = &alpha_;
+    const half *beta  = &beta_;
+
+    cudaEventRecord(start, 0);
+    for (unsigned i = 0; i < iterations; i++) {
+        checkCublas(cublasGemmEx(handle,
+                                 CUBLAS_OP_N,
+                                 CUBLAS_OP_N,
+                                 // Dimensions
+                                 num_rows_a,
+                                 num_cols_b,
+                                 num_cols_a,
+                                 alpha,
+                                 // A
+                                 a,
+                                 CUDA_R_16F,
+                                 num_rows_a,
+                                 // B
+                                 b,
+                                 CUDA_R_16F,
+                                 num_rows_b,
+                                 beta,
+                                 // C
+                                 c,
+                                 CUDA_R_16F,
+                                 num_rows_a,
+                                 // Compute precision and algorithm
+                                 CUDA_R_16F,
+                                 CUBLAS_GEMM_DEFAULT_TENSOR_OP));
+    }
+    cudaEventRecord(stop, 0);
+    cudaEventSynchronize(stop);
+    cudaEventElapsedTime(&kernel_time, start, stop);
+
+    cudaEventRecord(start, 0);
+    h2f(c, num_elements_c, cf);
+    cudaEventRecord(stop, 0);
+    cudaEventSynchronize(stop);
+    cudaEventElapsedTime(&to_fp32_time, start, stop);
+
+    total_time = to_fp16_time + (kernel_time / iterations) + to_fp32_time;
+    std::cout << "FP16 GEMM: " << std::to_string(total_time) << " ms\n";
+    std::cout << "\tTo FP16: " << std::to_string(to_fp16_time) << " ms\n";
+    std::cout << "\tKernel : " << std::to_string(kernel_time / iterations) << " ms\n";
+    std::cout << "\tTo FP32: " << std::to_string(to_fp32_time) << " ms\n";
+}
+
+
+/*int main() {
+    const unsigned num_rows_a = 5000 * 14 * 14;
+    const unsigned num_cols_a = 800;
+    const unsigned num_rows_b = num_cols_a;
+    const unsigned num_cols_b = 64;
+
+    const unsigned num_elements_a = num_rows_a * num_cols_a;
+    const unsigned num_elements_b = num_rows_b * num_cols_b;
+    const unsigned num_elements_c = num_rows_a * num_cols_b;
+
+    float *a;
+    float *b;
+    float *c;
+
+    checkCuda(cudaMallocManaged(&a, sizeof(float) * num_elements_a));
+    checkCuda(cudaMallocManaged(&b, sizeof(float) * num_elements_b));
+    checkCuda(cudaMallocManaged(&c, sizeof(float) * num_elements_c));
+
+    init(a, num_elements_a);
+    init(b, num_elements_b);
+    init(c, num_elements_c);
+
+    // FP32
+    sgemm(a, num_rows_a, num_cols_a, b, num_rows_b, num_cols_b, c);
+    printArray(c, 16);
+
+    // FP16
+    hgemm(a, num_rows_a, num_cols_a, b, num_rows_b, num_cols_b, c);
+    printArray(c, 16);
+
+    checkCuda(cudaFree(a));
+    checkCuda(cudaFree(b));
+    checkCuda(cudaFree(c));
+
+    return 0;
+}
+*/
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h
new file mode 100644
index 0000000000000000000000000000000000000000..35cca55ac2a8b8e60f30d0e9b310a3b3b6edcc82
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/global_data.h
@@ -0,0 +1,39 @@
+
+#ifndef GLOBAL_DATA_HEADER
+#define GLOBAL_DATA_HEADER
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <cstdio>
+#include <cstdlib>
+
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <cublas_v2.h>
+#include <cudnn.h>
+#include <cublas_api.h>
+
+
+#define ERROR_INJECTION_ENABLED 0
+
+#ifdef NO_INJECTION
+#undef ERROR_INJECTION_ENABLED
+#endif
+
+
+/* Data declarations */
+cudnnHandle_t cudnnHandle;
+cublasHandle_t cublasHandle;
+
+int op_counter = 0;
+int total_ops = 0;
+std::vector<int> op_accuracies;
+
+std::vector<void*> tensors_ptr;
+std::vector<void*> host_ptr;
+std::vector<void*> obj_ptr;
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h
new file mode 100644
index 0000000000000000000000000000000000000000..01e312efe41be74b593b47e2655df29c00043e6e
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h
@@ -0,0 +1,547 @@
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+#include <ctime>
+#include <cfloat>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <random>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <cublas_v2.h>
+#include <cudnn.h>
+#include <cublas_api.h>
+#include <cuda_fp16.h>
+#include <driver_types.h>
+
+
+// Tensor runtime header files
+#include "../include/tensor_runtime.h"
+#include "../include/tensor_utils.cu"
+#include "../include/debug.h"
+#include "../include/profiling.h"
+#include "../include/global_data.h"
+#include "../include/tensor.h"
+#include "../include/fp16_gemm.cu"
+
+
+
+void* tensorHalfGemm(void* lhs_ptr, void* rhs_ptr){
+
+  INFO("*** TensorHalfGemm \n");
+  profileEvent("tensorHalfGemm");
+
+  Tensor* lhs = (Tensor*) lhs_ptr;
+  Tensor* rhs = (Tensor*) rhs_ptr;
+
+  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
+  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
+
+  hostToDeviceCopy(lhs);
+  hostToDeviceCopy(rhs);
+
+  size_t* lhs_dims = lhs->dims.dim_sizes;
+  size_t* rhs_dims = rhs->dims.dim_sizes;
+
+  profileEvent("F2H_start");
+
+  Tensor* lhs_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+					      lhs_dims[0], lhs_dims[1], lhs_dims[2],
+					      lhs_dims[3]);
+  changeTensorPlacement(lhs_half, DEVICE);
+
+  Tensor* rhs_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+					      rhs_dims[0], rhs_dims[1], rhs_dims[2],
+					      rhs_dims[3]);
+  changeTensorPlacement(rhs_half, DEVICE);
+
+
+  f2h((float*) lhs->gpu_data, lhs->num_elems, (half*) lhs_half->gpu_data);
+  f2h((float*) rhs->gpu_data, rhs->num_elems, (half*) rhs_half->gpu_data);
+
+  profileEvent("F2H_end");
+
+
+  // 'm' holds the batch dimension - assuming NCHW format Tensors
+  int m = lhs->dims.dim_sizes[0];
+  // The rhs last dimension must contain the neurons
+  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
+  int k = 1;
+
+  for (int j = 1 ; j < lhs->dims.num_dims; j++){
+    k = k * lhs->dims.dim_sizes[j]; // input neurons
+  }
+
+  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
+  // Dimension-note: Check if k is same across the two tensors
+  INFO("m = %d, n = %d, k = %d \n", m, n, k);
+  if(rhs_k != k){
+    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
+  }
+
+  // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
+  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
+					    m, n, 1, 1);
+  Tensor* output_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+						 m, n, 1, 1);
+
+  // Changing output tensor placement from host to device
+  changeTensorPlacement(output_half, DEVICE);
+  changeTensorPlacement(output, DEVICE);
+
+  // Enabing tensor core ops for efficient half precision
+
+  //--- checkCudaErrors(cublasSetMathMode(cublasHandle, CUBLAS_TENSOR_OP_MATH));
+
+  // INFO: cuBlas uses column-major format
+  // INFO: The leading dimension is just the FIRST Dimension
+  // IMP: The output is N * M in column-major format, which is M*N in row-major - what cuDNN expects
+  const __half alf = approx_float_to_half(1.0);
+  const __half bet = approx_float_to_half(0.0);
+  const __half *alpha_half = &alf;
+  const __half *beta_half = &bet;
+
+
+  checkCudaErrors(cublasGemmEx(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N,
+			       n, m, k,
+			       alpha_half,
+			       (__half*) rhs_half->gpu_data, CUDA_R_16F, n,
+			       (__half*) lhs_half->gpu_data, CUDA_R_16F, k,
+			       beta_half,
+			       (__half*) output_half->gpu_data, CUDA_R_16F, n,
+			       CUDA_R_16F, CUBLAS_GEMM_DEFAULT_TENSOR_OP) );
+
+
+  profileEvent("H2F_start");
+  h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data);
+  profileEvent("H2F_end");
+
+
+  profileEvent("tensorHalfGemm_end");
+
+
+  freeTensor(lhs_half);
+  freeTensor(rhs_half);
+  freeTensor(output_half);
+
+
+  return output;
+}
+
+
+
+
+
+
+
+// FIXIT: Generalize all of the routines for types {half, float, double}
+void* tensorHalfConvolution(void* input_ptr, void* filter_ptr,
+			    int vertical_pad, int horizontal_pad,
+			    int vertical_stride, int horizontal_stride,
+			    int conv_mode, int compute_precision){
+
+  INFO("*** TensorHConvolution \n");
+  profileEvent("tensorHalfConv");
+
+  Tensor* input = (Tensor*) input_ptr;
+  Tensor* filter = (Tensor*) filter_ptr;
+
+  cudnnConvolutionDescriptor_t convDesc;
+  cudnnConvolutionFwdAlgo_t convAlgo;
+  cudnnConvolutionMode_t mode;
+  if(conv_mode == 0)
+    mode = CUDNN_CONVOLUTION;
+  else if(conv_mode == 1)
+    mode = CUDNN_CROSS_CORRELATION;
+
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+  // NOTE: compute in half precision
+  cudnnDataType_t computeType = CUDNN_DATA_HALF;
+
+  // NOTE: Moving inputs to GPU global memory
+  hostToDeviceCopy(input);
+  hostToDeviceCopy(filter);
+
+
+  /***** CONVERSIONS from FP32 to FP16 - on the GPU */
+  size_t* input_dims = input->dims.dim_sizes;
+  size_t* filter_dims = filter->dims.dim_sizes;
+
+
+  profileEvent("F2H_start");
+
+  Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+						input_dims[0], input_dims[1],
+						input_dims[2], input_dims[3]);
+  changeTensorPlacement(input_half, DEVICE);
+  Tensor* filter_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+						 filter_dims[0], filter_dims[1],
+						 filter_dims[2], filter_dims[3]);
+  changeTensorPlacement(filter_half, DEVICE);
+
+
+  f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data);
+  f2h((float*) filter->gpu_data, filter->num_elems, (half*) filter_half->gpu_data);
+
+  profileEvent("F2H_end");
+
+  /******* END OF INPUT DATA CONVERSIONS*/
+
+
+
+  checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc));
+  // FIXIT: Think if upscaling values need to be configurable?
+  // IMP-FIXIT:  CUDNN Cross correlation is only used in the Lenet context
+  // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used?
+  checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc,
+					     vertical_pad, horizontal_pad, // conv padding
+					     vertical_stride, horizontal_stride, // conv strides
+					     1, 1, // upscaling values
+					     mode, // mode is configurable
+					     computeType)); // defines compute precision
+
+  int n, c, h, w; // output dimensions
+  // Find dimension of convolution output
+  checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc,
+						   input->tensor_desc,
+						   filter->filter_desc,
+						   &n, &c, &h, &w));
+  DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
+
+
+  Tensor* output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
+					    CUDNN_TENSOR_NCHW, n, c, h, w);
+  // FIXIT: more checks for data types needed
+  Tensor* output_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF,
+						 CUDNN_TENSOR_NCHW, n, c, h, w);
+
+  // NOTE: Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE);
+  // NOTE: Necessary to insert the above call for every output tensor
+
+  DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, H = %d, W = %d, C = %d \n",
+	output->data_type, output->data_format, output->dims.dim_sizes[0], output->dims.dim_sizes[1],
+	output->dims.dim_sizes[2], output->dims.dim_sizes[3]);
+
+  if(convDesc == NULL || input->tensor_desc == NULL ||
+     filter->filter_desc == NULL || output->tensor_desc == NULL)
+    ERROR("NULL descriptor! \n");
+
+
+  // NOTE: The following algo works with TRUE half precision
+  convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
+
+  size_t workspace_size;
+  checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle,
+						     input_half->tensor_desc,
+						     filter_half->filter_desc,
+						     convDesc,
+						     output_half->tensor_desc,
+						     convAlgo,
+						     &workspace_size));
+
+  // Allocating memory for the convolution workspace
+  DEBUG("workspace size = %d \n", workspace_size);
+  void* workspace;
+  checkCudaErrors(cudaMalloc(&workspace, workspace_size));
+
+
+  checkCUDNN(cudnnConvolutionForward(cudnnHandle,
+				     &alpha,
+				     input_half->tensor_desc,
+				     input_half->gpu_data,
+				     filter_half->filter_desc,
+				     filter_half->gpu_data,
+				     convDesc, convAlgo, workspace, workspace_size,
+				     &beta,
+				     output_half->tensor_desc,
+				     output_half->gpu_data));
+
+
+  profileEvent("H2F_start");
+
+  // NOTE: Transforming half precision output to single precision
+  h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data);
+
+  profileEvent("H2F_end");
+
+  profileEvent("tensorHalfConv_end");
+
+
+  freeTensor(input_half);
+  freeTensor(filter_half);
+  freeTensor(output_half);
+
+  return output;
+}
+
+
+
+
+
+
+void* tensorHalfPooling(void* input_ptr,
+			int poolFunction,
+			int window_height, int window_width,
+			int vertical_pad, int horizontal_pad,
+			int vertical_stride, int horizontal_stride){
+
+
+  INFO("*** TensorHalfPooling \n");
+  profileEvent("tensorHalfPooling");
+
+  Tensor* input = (Tensor*) input_ptr;
+  size_t* input_dims = input->dims.dim_sizes;
+
+  /** floating point to half conversion */
+  profileEvent("F2H_start");
+  Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+						input_dims[0], input_dims[1],
+						input_dims[2], input_dims[3]);
+  changeTensorPlacement(input_half, DEVICE);
+
+  f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data);
+
+  profileEvent("F2H_end");
+  //*** end of data conversions
+
+  cudnnPoolingDescriptor_t poolDesc;
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+
+  hostToDeviceCopy(input);
+
+  checkCUDNN(cudnnCreatePoolingDescriptor(&poolDesc));
+
+  int n = input->dims.dim_sizes[0];
+  int c = input->dims.dim_sizes[1];
+  int h = (input->dims.dim_sizes[2] + (2 * vertical_pad) - window_height) / vertical_stride;
+  h = h + 1;
+  int w = (input->dims.dim_sizes[3] + (2 * horizontal_pad) - window_width) / horizontal_stride;
+  w = w + 1;
+
+  DEBUG("n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
+
+  // FIXIT: Don't be specific to floats
+  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, n, c, h, w);
+  // Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE);
+
+  // FIXIT: more checks for data types needed
+  Tensor* output_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF,
+						 CUDNN_TENSOR_NCHW, n, c, h, w);
+
+
+
+  // FIXIT: Fix being specific to CUDNN_DATA_FLOAT and NCHW format
+  // FIXIT: Is this setTensor even needed?
+  checkCUDNN(cudnnSetTensor4dDescriptor(output_half->tensor_desc,
+					CUDNN_TENSOR_NCHW,
+					CUDNN_DATA_HALF,
+					n, c,
+					h, w));
+
+  // FIXIT: Make the pool function (max, min, avg) configurable
+  checkCUDNN(cudnnSetPooling2dDescriptor(poolDesc,
+					 CUDNN_POOLING_MAX,
+					 CUDNN_PROPAGATE_NAN,
+					 window_height, window_width,
+					 vertical_pad, horizontal_pad,
+					 vertical_stride, horizontal_stride));
+
+  checkCUDNN(cudnnPoolingForward(cudnnHandle, poolDesc, &alpha, input_half->tensor_desc,
+				 input_half->gpu_data, &beta, output_half->tensor_desc, output_half->gpu_data));
+
+
+
+  profileEvent("H2F_start");
+
+  // NOTE: Transforming half precision output to single precision
+  h2f((half*) output_half->gpu_data, output->num_elems, (float*) output->gpu_data);
+
+  profileEvent("H2F_end");
+
+  profileEvent("tensorHalfPooling_end", true);
+
+
+  freeTensor(input_half);
+  freeTensor(output_half);
+
+
+  return output;
+}
+
+
+
+
+
+void* tensorHalfRelu2(void* input_ptr, float min, float max){
+
+  INFO("*** TensorClippedRelu \n");
+  profileEvent("tensorHalfClippedRelu");
+
+  Tensor* input = (Tensor*) input_ptr;
+  size_t* input_dims = input->dims.dim_sizes;
+
+  cudnnActivationDescriptor_t reluDesc;
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(input);
+
+
+  //**** Floating point to half conversions
+  profileEvent("F2H_start");
+  Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+						input_dims[0], input_dims[1],
+						input_dims[2], input_dims[3]);
+  changeTensorPlacement(input_half, DEVICE);
+
+  // Data conversion from float to half
+  f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data);
+
+  profileEvent("F2H_end");
+  /*** End of data type conversion **/
+
+
+  checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc));
+
+  checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_CLIPPED_RELU,
+					  CUDNN_PROPAGATE_NAN, 2.0));
+
+  checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha,
+				    input_half->tensor_desc, input_half->gpu_data, &beta,
+				    input_half->tensor_desc, input_half->gpu_data));
+
+
+  profileEvent("H2F_start");
+  // NOTE: Transforming half precision output to single precision
+  h2f((half*) input_half->gpu_data, input->num_elems, (float*) input->gpu_data);
+
+  profileEvent("H2F_end");
+
+  profileEvent("tensorHalfClippedRelu_end");
+
+
+  freeTensor(input_half);
+
+  return input;
+}
+
+
+
+
+
+
+void* tensorHalfTanh(void* input_ptr){
+
+  INFO("*** TensorHalfTanh \n");
+  profileEvent("tensorHalfTanh");
+
+
+  Tensor* input = (Tensor*) input_ptr;
+  size_t* input_dims = input->dims.dim_sizes;
+
+  cudnnActivationDescriptor_t tanhDesc;
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(input);
+
+
+  //**** Data conversion from float to half
+  profileEvent("F2H_start");
+  Tensor* input_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+						input_dims[0], input_dims[1],
+						input_dims[2], input_dims[3]);
+  changeTensorPlacement(input_half, DEVICE);
+
+  f2h((float*) input->gpu_data, input->num_elems, (half*) input_half->gpu_data);
+
+  profileEvent("F2H_end");
+  /**** End of data type conversion ****/
+
+
+  checkCUDNN(cudnnCreateActivationDescriptor(&tanhDesc));
+
+  checkCUDNN(cudnnSetActivationDescriptor(tanhDesc, CUDNN_ACTIVATION_TANH,
+					  CUDNN_PROPAGATE_NAN, 0.0));
+
+  checkCUDNN(cudnnActivationForward(cudnnHandle, tanhDesc, &alpha,
+				    input_half->tensor_desc, input_half->gpu_data, &beta,
+				    input_half->tensor_desc, input_half->gpu_data));
+
+  profileEvent("H2F_start");
+  // NOTE: Transforming half precision output to single precision
+  h2f((half*) input_half->gpu_data, input->num_elems, (float*) input->gpu_data);
+  profileEvent("H2F_end");
+
+  profileEvent("tensorHalfTanh_end");
+
+
+  freeTensor(input_half);
+
+  return input;
+}
+
+
+
+void* tensorHalfAdd(void* x_ptr, void* bias_ptr){
+
+  Tensor* x = (Tensor*) x_ptr;
+  Tensor* bias = (Tensor*) bias_ptr;
+
+  INFO("*** TensorHalfAdd \n");
+  profileEvent("tensorHalfAdd");
+
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(x);
+  hostToDeviceCopy(bias);
+
+  size_t* x_dims = x->dims.dim_sizes;
+  size_t* bias_dims = bias->dims.dim_sizes;
+
+
+  //**** Data conversion from float to half
+  profileEvent("F2H_start");
+  Tensor* x_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+					    x_dims[0], x_dims[1], x_dims[2],
+					    x_dims[3]);
+  changeTensorPlacement(x_half, DEVICE);
+
+  Tensor* bias_half = (Tensor*) create4DTensor(CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
+					       bias_dims[0], bias_dims[1], bias_dims[2],
+					       bias_dims[3]);
+  changeTensorPlacement(bias_half, DEVICE);
+
+  f2h((float*) x->gpu_data, x->num_elems, (half*) x_half->gpu_data);
+  f2h((float*) bias->gpu_data, bias->num_elems, (half*) bias_half->gpu_data);
+
+  profileEvent("F2H_end");
+  /*** End of data type conversions ****/
+
+
+  // FIXIT: routine fails for 3D tensors
+  checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias_half->tensor_desc,
+			    bias_half->gpu_data, &alpha, x_half->tensor_desc, x_half->gpu_data));
+
+
+  profileEvent("H2F_start");
+  // NOTE: Transforming half precision output to single precision
+  h2f((half*) x_half->gpu_data, x->num_elems, (float*) x->gpu_data);
+  profileEvent("H2F_end");
+
+  profileEvent("tensorHalfAdd_end");
+
+
+  freeTensor(x_half);
+  freeTensor(bias_half);
+
+  return x;
+}
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ea0b28545cbd81a59735d0b06c839a7f991ed35
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h
@@ -0,0 +1,229 @@
+
+
+#ifndef OP_OVERHEADS_HEADER
+#define OP_OVERHEADS_HEADER
+
+
+#include <math.h>
+#include <sstream>
+#include "tensor.h"
+
+
+float scale_down_factor = 10000.0;
+std::string result_str = "";
+
+
+// TODO: Every routine needs testing
+
+static float scaleDownComps(double total_comps){
+
+  total_comps = total_comps / scale_down_factor;
+  return total_comps;
+}
+
+// private function
+static float getScaledComps(double total_comps, int error_scale, int factor_type){
+
+  double scaled_comps;
+  
+  // Logarithmic error factor scaling - higher error, lower cost
+  if(factor_type == 1){   
+    float error_factor = log2((float) error_scale + 3);
+    scaled_comps = total_comps / error_factor;
+  }
+  // Linear error factor scaling
+  if(factor_type == 2){
+    scaled_comps = total_comps / (error_scale + 1); 
+  }
+  // Quadratic error factor scaling (scaling down)
+  if(factor_type == 3){
+    error_scale = (error_scale + 1) * (error_scale + 1);
+    scaled_comps = total_comps / error_scale; 
+  }
+
+  
+  return scaled_comps;
+}
+
+
+static void addNormToResult(float comps){
+
+  std::ostringstream ss;
+  ss << std::fixed << comps;
+  
+  result_str.append( std::string(ss.str()) );
+  result_str.append("\t");
+}
+
+
+
+static void addCompsToResult(float total_comps, float opt_comps1, float opt_comps2, float opt_comps3){
+
+  std::ostringstream ss;
+  ss << std::fixed << total_comps;
+  result_str.append( std::string(ss.str()) );
+  result_str.append("\t");
+
+  std::ostringstream ss2;
+  ss2 << std::fixed << opt_comps1;  
+  result_str.append( std::string(ss2.str()) );
+  result_str.append("\t");
+  
+  std::ostringstream ss3;
+  ss3 << std::fixed << opt_comps2;
+  result_str.append( std::string(ss3.str()) );
+  result_str.append("\t");
+
+  std::ostringstream ss4;
+  ss4 << std::fixed << opt_comps3;
+  result_str.append( std::string(ss4.str()) );
+  result_str.append("\n");
+}
+
+
+void dumpCompOverheads(double total_comps, int error_scale){
+
+  total_comps = scaleDownComps(total_comps);
+  
+  float scaled_comps1 = getScaledComps(total_comps, error_scale, 1); // Log scaling
+  float scaled_comps2 = getScaledComps(total_comps, error_scale, 2); // Linear scaling
+  float scaled_comps3 = getScaledComps(total_comps, error_scale, 3); // Quadratic scaling
+ 
+  //INFO("error_scale = %d, total_comps = %f, scaled_comps = %f \n",
+  //	 error_scale, total_comps, scaled_comps1);
+
+  addCompsToResult(total_comps, scaled_comps1, scaled_comps2, scaled_comps3); 
+}
+
+
+
+void add_conv_overheads(void* input_ptr, void* filter_ptr,
+			int vertical_stride, int horizontal_stride,
+			int error_scale){
+
+  Tensor* input = (Tensor*) input_ptr;
+  Tensor* filter = (Tensor*) filter_ptr;
+
+  double kernel_comps = filter->dims.dim_sizes[0] * filter->dims.dim_sizes[1] *
+    filter->dims.dim_sizes[2] * filter->dims.dim_sizes[3];
+
+  double H_in = input->dims.dim_sizes[2] / vertical_stride;
+  double W_in = input->dims.dim_sizes[3] / horizontal_stride;
+  double N_in = input->dims.dim_sizes[0]; // batch Dimension
+
+  double total_comps = N_in * H_in * W_in * kernel_comps;
+
+  dumpCompOverheads(total_comps, error_scale);
+    
+}
+
+
+void add_gemm_overheads(void* lhs_ptr, void* rhs_ptr, int error_scale){
+
+  Tensor* lhs = (Tensor*) lhs_ptr;
+  Tensor* rhs = (Tensor*) rhs_ptr;
+    
+  int m = lhs->dims.dim_sizes[0];
+  // The rhs last dimension must contain the neurons
+  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
+  int k = 1;
+  
+  // Flattening the dimensions after the batch dimension
+  for (int j = 1 ; j < lhs->dims.num_dims; j++){
+    k = k * lhs->dims.dim_sizes[j]; // input neurons
+  }
+
+  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
+  // Dimension-note: Check if k is same across the two tensors
+  
+  //printf("m = %d, n = %d, k = %d \n", m, n, k);
+  
+  if(rhs_k != k){
+    printf("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
+    abort();
+  }
+
+  double m_d = m;
+  double n_d = n;
+  double rhs_k_d = rhs_k;
+  
+  double total_comps = m_d * n_d * rhs_k_d * 1.0;
+  dumpCompOverheads(total_comps, error_scale);
+  
+}
+
+
+void add_bias_overheads(void* input_ptr, int error_scale){
+
+  Tensor* input = (Tensor*) input_ptr;  
+  double total_comps = input->num_elems;
+
+  dumpCompOverheads(total_comps, error_scale);
+  }
+
+
+void add_relu_overheads(void* input_ptr, int error_scale){
+  
+  Tensor* input = (Tensor*) input_ptr;
+  double total_comps = input->num_elems;
+
+  dumpCompOverheads(total_comps, error_scale);
+}
+
+
+float add_pool_overheads(void* input_ptr, int kernel_size,
+			 int stride_size, int error_scale){
+
+  Tensor* input = (Tensor*) input_ptr;
+  
+  int num_dims = input->dims.num_dims;
+  double H = input->dims.dim_sizes[num_dims-2];
+  double W = input->dims.dim_sizes[num_dims-1];
+  double C = input->dims.dim_sizes[1]; // channel dimension
+  double N = input->dims.dim_sizes[0]; // batch dimension
+
+  H = H / stride_size;
+  W = W / stride_size;
+
+  double total_comps = N * C * H * W * kernel_size * kernel_size;
+
+  dumpCompOverheads(total_comps, error_scale);
+
+}
+
+
+void add_norms(void* norms_ptr, char* op_name, int error_value){
+
+  // Print operation name - {tensorAdd, tensorPool, tensorGemm}
+  result_str.append(op_name);
+  result_str.append("\t");
+  
+  addNormToResult(error_value);
+  
+  Norm_t* norms = (Norm_t*) norms_ptr;
+
+  addNormToResult(norms->mean_l1);
+  addNormToResult(norms->mean_l2);
+  addNormToResult(norms->orig_inf_norm);
+  
+  addNormToResult(norms->l1_norm);
+  addNormToResult(norms->l2_norm);
+  addNormToResult(norms->inf_norm);
+}
+
+
+void dump_result(char* file_name){
+
+  FILE* fp = fopen(file_name, "w+");
+  if(fp != NULL){
+    fwrite(result_str.c_str(), 1, result_str.length(), fp);
+    fclose(fp);
+  }
+  else{
+    ERROR("Could not create file \n");
+  }
+
+  result_str = "";
+}
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h
new file mode 100644
index 0000000000000000000000000000000000000000..8eb7f92ab014c00adb9b89875c375d82546c9f38
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/profiling.h
@@ -0,0 +1,93 @@
+
+#ifndef PROFILING_HEADER
+#define PROFILING_HEADER
+
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctime>
+#include <chrono>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <random>
+#include <string>
+#include <unordered_map>
+
+
+
+/***** Profiling routines ***/
+
+std::unordered_map<std::string, int> func_counters;
+std::string profile_data = "";
+
+std::chrono::time_point<std::chrono::high_resolution_clock> start_time;
+// previous_time maintains time for the latest timed operation
+std::chrono::time_point<std::chrono::high_resolution_clock> previous_time;
+
+extern "C"{
+
+  void startProfiling(){
+    start_time = std::chrono::high_resolution_clock::now();
+  }
+
+  void stopProfiling(){
+    
+    FILE* fp = fopen("profile_data.txt", "w+");
+    if(fp != NULL){   
+      fwrite(profile_data.c_str(), 1, profile_data.length(), fp);
+      fclose(fp);
+    }
+    
+    profile_data = "";
+    func_counters.clear();
+  }
+
+
+  void profileEvent(char* event_name, bool compare_previous = false){
+
+    auto it = func_counters.find(event_name);
+    if(it == func_counters.end()){
+      func_counters[event_name] = 1; 
+    }
+    else{
+      int counter = func_counters[event_name];
+      counter++;
+      func_counters[event_name] = counter;
+    }
+
+    std::stringstream ss;
+    ss << func_counters[event_name];
+    std::string event_count = ss.str();
+
+  
+    std::chrono::time_point<std::chrono::high_resolution_clock> zero_time; 
+    std::chrono::time_point<std::chrono::high_resolution_clock> time_reading =
+      std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double, std::ratio<1>> current_time =
+      time_reading - zero_time;
+  
+    INFO("AbsoluteTime, Event = %s, Time = %f \n", event_name, current_time.count());  
+    profile_data.append(event_name);
+    profile_data.append(event_count);
+    profile_data.append("\t");
+    profile_data.append(std::to_string(current_time.count()));
+  
+    if(compare_previous){
+      std::chrono::duration<double, std::ratio<1>> duration_time =
+	time_reading - previous_time;
+
+      profile_data.append("\t");
+      profile_data.append(std::to_string(duration_time.count()));
+      INFO("TimeDuration, Event = %s, Time = %f \n", event_name, duration_time.count());  
+    }
+
+    profile_data.append("\n");  
+  
+    previous_time = time_reading; // set the previous time reading to the current profiled time 
+  }
+
+}
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
new file mode 100644
index 0000000000000000000000000000000000000000..08b4369fca5fbc28fc2b3c3dbe31fa81e85e7ff6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor.h
@@ -0,0 +1,53 @@
+
+
+#ifndef TENSOR_HEADER
+#define TENSOR_HEADER
+
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <cublas_v2.h>
+#include <cudnn.h>
+#include <cublas_api.h>
+#include <cuda_fp16.h>
+#include <driver_types.h>
+
+
+
+struct Norm_t{
+  float mean_l1;
+  float mean_l2;
+  float orig_inf_norm;
+  float l0_norm;
+  float l1_norm;
+  float l2_norm;
+  float inf_norm;
+};
+
+
+struct Dimension{
+  int num_dims;
+  size_t* dim_sizes;
+};
+
+enum data_location_t{
+  HOST,
+  DEVICE
+};
+
+
+struct Tensor{
+  int data_type;
+  int data_format;
+  data_location_t data_placement; // Maintains the location of the tensor {host, device...} 
+  cudnnTensorDescriptor_t tensor_desc;
+  cudnnFilterDescriptor_t filter_desc; // FIXIT: Rethink if this should be in tensor struct
+  void* host_data;
+  void* gpu_data; // The pointers should not be device specific per se - TODO: Better design needed
+  size_t num_elems; // Total elements
+  size_t size_in_bytes; // Total size in bytes
+  struct Dimension dims;
+};
+
+
+#endif
+
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.cc b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2216172eab78414b46814e0d457908f5584c606a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.cc
@@ -0,0 +1,114 @@
+
+#include <stdio.h>
+#include <cstdlib>
+#include <cmath>
+#include <memory>
+#include <string>
+
+
+#ifndef CUDNN_HEADER
+#define CUDNN_HEADER
+
+
+extern "C"{
+  /****  Initialization Routine - Must be inserted at program start (in the backend)  ****/
+  void llvm_hpvm_initTensorRt(int gpuid = 0);
+  void llvm_hpvm_cleanupTensorRt();
+
+  // Routine to moving tensor data (from and to GPU,CPU)
+  void hpvm_request_tensor(void* tensor, int destination);
+
+  /****** Profiling API - defines profiling scope */
+  void startProfiling();
+  void stopProfiling();
+
+  /****** Routines for tensor creation and initialization *******/
+  void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size);
+  void* create3DTensor(int data_type, size_t dim1_size, size_t dim2_size,
+		       size_t dim3_size);
+
+  // NOTE: Currently only using 4-D tensors - 2D and 3D tensors not supported for cuDNN operations
+  // NOTE: The only data format supported as of now is: CUDNN_NCHW
+  void* create4DTensor(int data_type, int data_format, size_t dim1_size, size_t dim2_size,
+		       size_t dim3_size, size_t dim4_size);
+  void initTensorData(void* tensor, void* data_ptr, size_t size_in_bytes);
+
+  /********** Tensor Operation API ******/
+
+  void** tensorSplit(void* tensor, int num_splits, int split_dim);
+  void* tensorConcat(void** tensors, int num_splits, int split_dim);
+
+  // NOTE: For conv_mode, only value '1' is supported
+  void* tensorConvolution(void* input, void* filter,
+			  int vertical_pad, int horizontal_pad,
+			  int vertical_stride, int horizontal_stride,
+			  int conv_mode, int compute_precision);
+  void* tensorHConvolution(void* input, void* filter,
+			  int vertical_pad, int horizontal_pad,
+			  int vertical_stride, int horizontal_stride,
+			  int conv_mode, int compute_precision);
+
+  void* tensorPooling(void* input,
+		      int poolFunction,
+		      int window_height, int window_width,
+		      int vertical_pad, int horizontal_pad,
+		      int vertical_stride, int horizontal_stride);
+
+  void* tensorLRN(void* input, unsigned int LRN_window,
+		  double LRN_alpha, double LRN_beta, double LRN_k);
+
+
+  /* 4 different Gemm versions */
+  void* tensorGemm(void* lhs, void* rhs);
+  void* tensorGemmCPU(void* lhs, void* rhs);
+  void* tensorGemmGPU(void* lhs, void* rhs);
+  void* tensorHgemm(void* lhs, void* rhs);
+
+  
+  // NOTE: In-place operation
+  void* tensorGemmBias(void* input, void* bias);
+  // NOTE: In place operation
+  void* tensorAdd(void* x, void* bias);
+  // NOTE: In-place operation
+  void* tensorRelu(void* input);
+  // NOTE: In-place operation
+  void* tensorSoftmax(void* input);
+
+  /* Error injection API - used for accuracy tuning */
+  void* tensorAddError(void* x_ptr);  
+}
+
+
+
+void emptyFunction(){
+
+  void* initRT = (void*) &llvm_hpvm_initTensorRt;
+  void* cleanRT = (void*) &llvm_hpvm_cleanupTensorRt;
+  void* request_tensorPtr = (void*) &hpvm_request_tensor;
+  void* startProf = (void*) &startProfiling;
+  void* stopProf = (void*) &stopProfiling;
+  void* create2Dptr = (void*) &create2DTensor;
+  void* create3Dptr = (void*) &create3DTensor;
+  void* create4Dptr = (void*) &create4DTensor;
+  void* initTensorPtr = (void*) &initTensorData;
+  void* tensorSplitPtr = (void*) &tensorSplit;
+  void* tensorConcatPtr = (void*) &tensorConcat;
+  void* tensorConvPtr = (void*) &tensorConvolution;
+  void* tensorHConvPtr = (void*) &tensorHConvolution;
+  void* tensorPoolPtr = (void*) &tensorPooling;
+  void* tensorLRNPtr = (void*) &tensorLRN;
+  void* tensorGemmPr = (void*) &tensorGemm;
+  void* tensorGemmCPUPtr = (void*) &tensorGemmCPU;
+  void* tensorGemmGPUPtr = (void*) &tensorGemmGPU;
+  void* tensorHgemmPtr = (void*) &tensorHgemm;
+  void* tensorGemmBiasPtr = (void*) &tensorGemmBias;
+  void* tensorAddPtr = (void*) &tensorAdd;
+  void* tensorReluPtr = (void*) &tensorRelu;
+  void* tensorSoftmaxPtr = (void*) &tensorSoftmax;
+  void* tensorAddErrorPtr = (void*) &tensorAddError;
+    
+}
+
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
new file mode 100644
index 0000000000000000000000000000000000000000..527d88c77bdec82e61bc37e411d79ec7485208ca
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
@@ -0,0 +1,142 @@
+
+#include <stdio.h>
+#include <cstdlib>
+#include <cmath>
+#include <memory>
+#include <string>
+
+
+#ifndef CUDNN_HEADER
+#define CUDNN_HEADER
+
+
+extern "C"{
+  /****  Initialization Routine - Must be inserted at program start (in the backend)  ****/
+  void llvm_hpvm_initTensorRt(int gpuid = 0);
+  void llvm_hpvm_cleanupTensorRt();
+
+  // Routine to moving tensor data (from and to GPU,CPU)
+  void hpvm_request_tensor(void* tensor, int destination);
+
+  /****** Profiling API - defines profiling scope */
+  void startProfiling();
+  void stopProfiling();
+
+  /****** Routines for tensor creation and initialization *******/
+  void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size);
+  void* create3DTensor(int data_type, size_t dim1_size, size_t dim2_size,
+		       size_t dim3_size);
+
+  // NOTE: Currently only using 4-D tensors - 2D and 3D tensors not supported for cuDNN operations
+  // NOTE: The only data format supported as of now is: CUDNN_NCHW
+  void* create4DTensor(int data_type, int data_format, size_t dim1_size, size_t dim2_size,
+		       size_t dim3_size, size_t dim4_size);
+  void initTensorData(void* tensor, void* data_ptr, size_t size_in_bytes);
+
+  void freeTensor(void*);
+
+  /********** Tensor Operation API ******/
+
+  void** tensorSplit(void* tensor, int num_splits, int split_dim);
+  void* tensorConcat(void** tensors, int num_splits, int split_dim);
+
+  // NOTE: For conv_mode, only value '1' is supported
+  void* tensorConvolution(void* input, void* filter,
+			  int vertical_pad, int horizontal_pad,
+			  int vertical_stride, int horizontal_stride,
+			  int conv_mode, int compute_precision);
+  void* tensorHalfConvolution(void* input, void* filter,
+			  int vertical_pad, int horizontal_pad,
+			  int vertical_stride, int horizontal_stride,
+			  int conv_mode, int compute_precision);
+
+  void* tensorPooling(void* input,
+		      int poolFunction,
+		      int window_height, int window_width,
+		      int vertical_pad, int horizontal_pad,
+		      int vertical_stride, int horizontal_stride);
+
+  void* tensorHalfPooling(void* input,
+		       int poolFunction,
+		       int window_height, int window_width,
+		       int vertical_pad, int horizontal_pad,
+		       int vertical_stride, int horizontal_stride);
+
+  
+  void* tensorLRN(void* input, unsigned int LRN_window,
+		  double LRN_alpha, double LRN_beta, double LRN_k);
+
+
+  /* 4 different Gemm versions */
+  void* tensorGemm(void* lhs, void* rhs);
+  void* tensorGemmCPU(void* lhs, void* rhs);
+  void* tensorGemmGPU(void* lhs, void* rhs);
+  void* tensorHalfGemm(void* lhs, void* rhs);
+
+  
+  // NOTE: In-place operation
+  void* tensorGemmBias(void* input, void* bias);
+  // NOTE: In place operation
+  void* tensorAdd(void* x, void* bias);
+  // NOTE: In place operation
+  void* tensorHalfAdd(void* x, void* bias);
+  // NOTE: In-place operation
+  void* tensorRelu(void* input);
+  // NOTE: In-place operation
+  
+  void* tensorTanh(void* input);
+  // NOTE: In-place operation
+  void* tensorHalfTanh(void* input);
+
+  // NOTE: In-place operation
+  void* tensorRelu2(void* input, float min, float max);
+  // NOTE: In-place operation
+  void* tensorHalfRelu2(void* input, float min, float max);
+  // NOTE: In-place operation
+  void* tensorSoftmax(void* input);
+
+  /* Error injection API - used for accuracy tuning */
+  void* tensorAddError(void* x_ptr, int error_scale);  
+}
+
+
+void dumpAccuracyNorms();
+void readOpenTunerFlags(char* file_name);
+void clearOpCounter();
+void clearTensorMap();
+void freeOutputTensors();
+
+
+/*void emptyFunction(){
+
+  void* initRT = (void*) &llvm_hpvm_initTensorRt;
+  void* cleanRT = (void*) &llvm_hpvm_cleanupTensorRt;
+  void* request_tensorPtr = (void*) &hpvm_request_tensor;
+  void* startProf = (void*) &startProfiling;
+  void* stopProf = (void*) &stopProfiling;
+  void* create2Dptr = (void*) &create2DTensor;
+  void* create3Dptr = (void*) &create3DTensor;
+  void* create4Dptr = (void*) &create4DTensor;
+  void* initTensorPtr = (void*) &initTensorData;
+  void* tensorSplitPtr = (void*) &tensorSplit;
+  void* tensorConcatPtr = (void*) &tensorConcat;
+  void* tensorConvPtr = (void*) &tensorConvolution;
+  void* tensorHConvPtr = (void*) &tensorHConvolution;
+  void* tensorPoolPtr = (void*) &tensorPooling;
+  void* tensorLRNPtr = (void*) &tensorLRN;
+  void* tensorGemmPr = (void*) &tensorGemm;
+  void* tensorGemmCPUPtr = (void*) &tensorGemmCPU;
+  void* tensorGemmGPUPtr = (void*) &tensorGemmGPU;
+  void* tensorHgemmPtr = (void*) &tensorHgemm;
+  void* tensorGemmBiasPtr = (void*) &tensorGemmBias;
+  void* tensorAddPtr = (void*) &tensorAdd;
+  void* tensorReluPtr = (void*) &tensorRelu;
+  void* tensorSoftmaxPtr = (void*) &tensorSoftmax;
+  void* tensorAddErrorPtr = (void*) &tensorAddError;    
+}
+
+*/
+
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e5db155e1e6ed62beee1caec7002ba4bd099472d
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu
@@ -0,0 +1,385 @@
+
+#ifndef TENSOR_UTILS_HEADER
+#define TENSOR_UTILS_HEADER
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cmath>
+#include <ctime>
+#include <cfloat>
+#include <algorithm>
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <random>
+#include <sstream>
+#include <string>
+
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <cublas_v2.h>
+#include <cudnn.h>
+#include <cublas_api.h>
+#include <vector>
+
+#include "../../dnn_sources/include/types.h"
+#include "tensor_runtime.h"
+#include "debug.h"
+#include "tensor.h"
+#include "global_data.h"
+
+
+// used to map HPVM tensors to runtime tensors (with extra runtime-specific information)
+std::vector<Tensor*> tensorsArr;
+int total_tensors = 0;
+
+
+
+void addRuntimeTensor(struct Tensor_t* hpvm_tensor, struct Tensor* tensor){
+  
+  hpvm_tensor->tensor_id = total_tensors;
+  total_tensors++;      
+  tensorsArr.push_back(tensor);
+}
+
+
+struct Tensor* getRuntimeTensor(struct Tensor_t* hpvm_tensor){
+  int tensor_id = hpvm_tensor->tensor_id;
+  if(tensor_id < total_tensors)
+    return tensorsArr[tensor_id];
+  else
+    ERROR("Tensor not found in runtime. Aborting ...");
+}
+
+
+
+void freeTensor(void* tensor_ptr){
+
+  Tensor* tensor = (Tensor*) tensor_ptr;
+
+  for(int i = 0; i < tensors_ptr.size(); i++){
+    if(tensors_ptr[i] == tensor->gpu_data)
+      tensors_ptr.erase(tensors_ptr.begin()+i);   
+  }
+  
+  for(int i = 0; i < host_ptr.size(); i++){
+    if(host_ptr[i] == tensor->host_data)
+      host_ptr.erase(host_ptr.begin()+i);   
+  }
+
+  for(int i = 0; i < obj_ptr.size(); i++){
+    if(obj_ptr[i] == tensor_ptr)
+      obj_ptr.erase(obj_ptr.begin()+i);   
+  }
+
+  
+  cudaFree(tensor->gpu_data);
+  tensor->gpu_data = NULL;
+  free(tensor->host_data);
+  tensor->host_data = NULL;
+  
+}
+
+
+// Returns the size of the target cudnn datatype
+int getTypeSize(int data_type){
+  if(data_type == CUDNN_DATA_FLOAT)
+    return 4;
+
+  if(data_type == CUDNN_DATA_HALF)
+    return 2;
+
+  INFO("Given type = %d, expected type = %d \n", data_type, CUDNN_DATA_FLOAT);
+  return 1;
+  // TODO: Add support for more data types
+}
+
+
+void setSizeInBytes(struct Tensor* tensor, int data_type, size_t num_elems){
+  int type_size = getTypeSize(data_type);
+  size_t size_in_bytes = type_size * num_elems;
+  tensor->size_in_bytes = size_in_bytes;
+}
+
+
+// NOTE: Will need to extend this fucntion to support other device allocations
+void allocateMem(struct Tensor* tensor, int data_type, size_t num_elems){
+  setSizeInBytes(tensor, data_type, num_elems);
+  tensor->data_type = data_type;
+  tensor->num_elems = num_elems;
+  tensor->host_data = (void*) malloc(tensor->size_in_bytes); // Allocate memory on the host
+  tensor->data_placement = HOST; // By defaut data is on the host
+  checkCudaErrors(cudaMalloc(&tensor->gpu_data, tensor->size_in_bytes)); // Allocate memory on GPU
+
+  tensors_ptr.push_back(tensor->gpu_data);
+  host_ptr.push_back(tensor->host_data);
+  obj_ptr.push_back(tensor);
+  //host_ptr.push_back(tensor->host_data); 
+}
+
+
+void setCudnnDataFormat(struct Tensor* tensor, int data_format){
+
+  switch(data_format){
+  case 0:
+    data_format = CUDNN_TENSOR_NCHW; break;
+  case 1:
+    data_format = CUDNN_TENSOR_NHWC; break;
+  
+  default:
+    break;
+  }
+
+  tensor->data_format = data_format;
+  DEBUG("tensor->data_format = %d \n", tensor->data_format);
+}
+
+
+void set4DFilterDescriptor(struct Tensor* tensor, int data_format, size_t dim1_size,
+			   size_t dim2_size, size_t dim3_size, size_t dim4_size){
+
+  setCudnnDataFormat(tensor, data_format);
+  
+  checkCUDNN(cudnnCreateFilterDescriptor(&tensor->filter_desc));
+  
+  checkCUDNN(cudnnSetFilter4dDescriptor(tensor->filter_desc,
+					(cudnnDataType_t) tensor->data_type,
+					(cudnnTensorFormat_t) tensor->data_format,
+					dim1_size,
+					dim2_size, 
+					dim3_size,
+					dim4_size));  
+}
+
+
+
+void set4DTensorDescriptor(struct Tensor* tensor, int data_format, size_t dim1_size,
+			   size_t dim2_size, size_t dim3_size, size_t dim4_size){
+
+  setCudnnDataFormat(tensor, data_format);
+
+  checkCUDNN(cudnnCreateTensorDescriptor(&tensor->tensor_desc));
+
+  // For certain operations, the strides may need to change - in which case the descriptor
+  // needs to be reinitialized
+  // FIXIT: Only specific to floats - make generic and test
+  cudnnSetTensor4dDescriptor(tensor->tensor_desc,
+			     (cudnnTensorFormat_t) tensor->data_format, // Data format
+			     (cudnnDataType_t) tensor->data_type, // Data type
+			     dim1_size, dim2_size, 
+			     dim3_size, dim4_size);
+
+  cudnnDataType_t dType;
+  int nStride, cStride, hStride, wStride;
+  int size1, size2, size3, size4;
+  cudnnGetTensor4dDescriptor(tensor->tensor_desc,
+  			     &dType,
+  			     &size1, &size2, &size3, &size4,
+  			     &nStride, &cStride, &hStride, &wStride);
+			   
+  INFO("nStride = %d, cStride = %d, hStride = %d, wStride = %d \n",
+  	 nStride, cStride, hStride, wStride);
+}
+
+
+// FIXIT: Striding still not working - hence 2D and 3D tensor support is missing
+void setTensorDescriptor(struct Tensor* tensor, int num_dims,
+			 size_t* dim_sizes){
+
+  checkCUDNN(cudnnCreateTensorDescriptor(&tensor->tensor_desc));
+
+  int* strides = (int*) malloc(sizeof(int) * num_dims);
+  strides[num_dims - 1] = 1;
+  for(int i = num_dims - 2; i >= 0; i--){
+    strides[i] = strides[i+1] * dim_sizes[i+1];
+  }
+
+  for(int i = 0; i < num_dims; i++){
+    INFO("strides[%d] = %d \n", i, strides[i]);
+  }
+
+  int* const_dims = (int*) malloc(sizeof(int) * num_dims);
+  for(int j = 0 ; j < num_dims; j++){
+    const_dims[j] = (int) dim_sizes[j];
+    INFO("const_dim = %d \n", const_dims[j]);
+  }
+  
+  INFO("data_type = %d, cuDNN_value = %d \n", tensor->data_type, CUDNN_DATA_FLOAT); 
+  // For certain operations, the strides may need to change - in which case the descriptor
+  // needs to be reinitialized
+  checkCUDNN(cudnnSetTensorNdDescriptor(tensor->tensor_desc,
+					(cudnnDataType_t) tensor->data_type, // Data type
+					num_dims,
+					(const int*) const_dims,
+					(const int*) strides));
+}
+
+
+
+
+extern "C"{
+
+  void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size){
+    struct Tensor* tensor = (struct Tensor*) malloc(sizeof(Tensor));
+    size_t num_elems = dim1_size * dim2_size;
+    allocateMem(tensor, data_type, num_elems);
+    // Setting the tensor dimensions  
+    size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * 2);
+    dim_sizes[0] = dim1_size;
+    dim_sizes[1] = dim2_size;
+    tensor->dims.dim_sizes = dim_sizes;
+    tensor->dims.num_dims = 2;
+  
+    return tensor;
+  }
+
+
+  void* create3DTensor(int data_type, size_t dim1_size, size_t dim2_size,
+		       size_t dim3_size){
+    struct Tensor* tensor = (struct Tensor*) malloc(sizeof(Tensor));
+    size_t num_elems = dim1_size * dim2_size * dim3_size;
+    allocateMem(tensor, data_type, num_elems);
+    // Setting the tensor dimensions  
+    size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * 3);
+    dim_sizes[0] = dim1_size;
+    dim_sizes[1] = dim2_size;
+    dim_sizes[2] = dim3_size;
+    tensor->dims.dim_sizes = dim_sizes;
+    tensor->dims.num_dims = 3;
+
+    return tensor;
+  }
+
+
+  void* create4DTensor(int data_type, int data_format, size_t dim1_size, size_t dim2_size,
+		       size_t dim3_size, size_t dim4_size){
+    struct Tensor* tensor = (struct Tensor*) malloc(sizeof(Tensor));
+    size_t num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
+    allocateMem(tensor, data_type, num_elems);
+    // Setting the tensor dimensions  
+    size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * 4);
+    dim_sizes[0] = dim1_size;
+    dim_sizes[1] = dim2_size;
+    dim_sizes[2] = dim3_size;
+    dim_sizes[3] = dim4_size;
+    tensor->dims.dim_sizes = dim_sizes;
+    tensor->dims.num_dims = 4;
+    // Done setting tensor dimensions  
+    //setTensorDescriptor(tensor, 4, dim_sizes);
+    set4DTensorDescriptor(tensor, data_format, dim1_size, dim2_size, dim3_size, dim4_size);
+    // FIXIT: filter descriptor should be invoked only for filters
+    set4DFilterDescriptor(tensor, data_format, dim1_size, dim2_size, dim3_size, dim4_size);
+  
+    return tensor;
+  }
+
+
+  void initTensorData(void* tensor_ptr, void* data_ptr, size_t size_in_bytes){
+
+    Tensor* tensor = (Tensor*) tensor_ptr;
+  
+    if(tensor->size_in_bytes != size_in_bytes){
+      ERROR("The destination and source sizes don't match");
+    }
+  
+    std::memcpy(tensor->host_data, data_ptr, size_in_bytes);
+  }
+
+		      
+
+  void hostToDeviceCopy(struct Tensor* tensor){
+
+    if(tensor->data_placement != DEVICE){
+      cudaMemcpy(tensor->gpu_data, tensor->host_data, tensor->size_in_bytes,
+		 cudaMemcpyHostToDevice);
+      INFO("Moving %d bytes from host to GPU \n", tensor->size_in_bytes);
+      tensor->data_placement = DEVICE;
+    }
+    else{
+      DEBUG("No data movement required - Data on Device \n");    
+    }
+  
+  }
+
+
+  void deviceToHostCopy(struct Tensor* tensor){
+
+    if(tensor->data_placement != HOST){
+      cudaMemcpy(tensor->host_data, tensor->gpu_data, tensor->size_in_bytes,
+		 cudaMemcpyDeviceToHost);  
+      INFO("Moving %d bytes from GPU to host \n", tensor->size_in_bytes);
+      tensor->data_placement = HOST;
+    }
+    else{
+      DEBUG("No data movement required - Data on Host \n");    
+    }
+    
+  }
+
+
+  void tensorCopy(struct Tensor* srcTensor, struct Tensor* dstTensor){
+
+    if(srcTensor->data_placement == HOST){
+      memcpy(dstTensor->host_data, srcTensor->host_data, srcTensor->size_in_bytes);  
+      INFO("Moving %d bytes from host to host \n", srcTensor->size_in_bytes);
+      dstTensor->data_placement = HOST;
+    }
+    else if (srcTensor->data_placement == DEVICE){
+      cudaMemcpy(dstTensor->gpu_data, srcTensor->gpu_data, srcTensor->size_in_bytes,
+		 cudaMemcpyDeviceToDevice);
+      INFO("Moving %d bytes from GPU to GPU \n", srcTensor->size_in_bytes);
+      dstTensor->data_placement = DEVICE;
+    }
+    
+  }
+
+
+  void hpvm_request_tensor(void* tensor_ptr, int destination){
+
+    Tensor* tensor = (Tensor*) tensor_ptr;
+    // If destination is the host
+    if(destination == 0){  
+      if(tensor->data_placement != HOST){
+	cudaMemcpy(tensor->host_data, tensor->gpu_data, tensor->size_in_bytes,
+		   cudaMemcpyDeviceToHost);  
+	DEBUG("Moving %d bytes from GPU to host \n", tensor->size_in_bytes);
+	tensor->data_placement = HOST;
+      }
+      else{
+	DEBUG("No data movement required - Data on Host \n");    
+      }
+    }
+    // If destination is the GPU
+    else if(destination == 1){
+
+      if(tensor->data_placement != DEVICE){
+	cudaMemcpy(tensor->gpu_data, tensor->host_data, tensor->size_in_bytes,
+		   cudaMemcpyHostToDevice);
+	INFO("Moving %d bytes from host to GPU \n", tensor->size_in_bytes);
+	tensor->data_placement = DEVICE;
+      }
+      else{
+	DEBUG("No data movement required - Data on Device \n");    
+      }    
+    }
+  
+  }
+
+}
+
+// Called from within the runtime to change the data placement
+// This routine is required to change the output data placements from host to device
+void changeTensorPlacement(struct Tensor* tensor, data_location_t data_placement){
+  tensor->data_placement = data_placement;
+}
+
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c39f844c4e87662258f8a564cadbb56962cd14b3
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
@@ -0,0 +1,999 @@
+/* This file includes the API implementation of the HPVM tensor runtime built on cublas, cudnn
+**
+**  Author: Hashim Sharif
+**  Email: hsharif3@illinois.edu
+*/
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+#include <ctime>
+#include <cfloat>
+#include <algorithm>
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <random>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <cublas_v2.h>
+#include <cudnn.h>
+#include <cublas_api.h>
+#include <cuda_fp16.h>
+#include <driver_types.h>
+
+
+// Tensor runtime header files
+#include "../include/tensor_runtime.h"
+#include "../include/tensor_utils.cu"
+#include "../include/debug.h"
+#include "../include/profiling.h"
+#include "../include/fp16_conversion.h"
+#include "../include/global_data.h"
+#include "../include/error.h"
+#include "../include/tensor.h"
+#include "../include/op_overheads.h"
+#include "../include/half_precision_api.h"
+
+
+
+//** TODOs:
+//   1) Add support for dataypes beyond floats - need to fix hardcoded CUDNN_DATA_FLOAT inputs
+//   2) Add a larger set of operations 
+
+
+
+void llvm_hpvm_initTensorRt(int gpuid){
+  // NOTE: Setting the target GPU. Can we use multiple GPUs?
+  checkCudaErrors(cudaSetDevice(gpuid));
+  // Initializing cuDNN and cuBlas handles
+  checkCudaErrors(cublasCreate(&cublasHandle));
+  checkCUDNN(cudnnCreate(&cudnnHandle));
+
+  #ifdef ERROR_INJECTION_ENABLED
+  
+  readOpenTunerFlags("opentuner_flags");
+
+  #endif
+}
+
+
+void llvm_hpvm_cleanupTensorRt(){
+
+}
+
+void dumpAccuracyNorms(){
+
+  #ifdef ERROR_INJECTION_ENABLED
+  
+  dump_result("accuracy_summary");
+  
+  #endif
+}
+
+
+// Returns the number of GPUs active on the platform
+int getGPUCount(){
+  int num_gpus;
+  checkCudaErrors(cudaGetDeviceCount(&num_gpus));
+  return num_gpus;
+}
+
+
+
+void clearTensorMap(){
+
+  tensors_ptr.clear();
+  host_ptr.clear();
+  obj_ptr.clear();
+}
+
+
+void freeOutputTensors(){
+
+  for(int i = 0; i < tensors_ptr.size(); i++){
+    //printf("freeing tensor \n");
+    cudaFree(tensors_ptr[i]);
+  }
+
+  for(int i = 0; i < host_ptr.size(); i++){
+    //printf("freeing tensor \n");
+    free(host_ptr[i]);
+    host_ptr[i] = NULL;
+  }
+  
+  for(int i = 0; i < obj_ptr.size(); i++){
+    //printf("freeing tensor \n");
+    free(obj_ptr[i]);
+    obj_ptr[i] = NULL;
+  }
+}
+
+
+
+void clearOpCounter(){
+  total_ops = 0;
+  op_counter = 0;
+  op_accuracies.clear();
+}
+
+
+
+
+
+
+// FIXIT: Fix any assumptions on the NCHW format
+// TODO: benchmark split performance and check if it is prohibitively high?
+void** tensorSplit(void* tensor_ptr, int num_splits, int split_dim){
+
+  INFO("*** TensorSplit \n");  
+  profileEvent("tensorSplit");
+
+  Tensor* tensor = (Tensor*) tensor_ptr;
+  
+  deviceToHostCopy(tensor); // Splitting done on the host
+
+  Tensor** splits = (Tensor**) malloc(sizeof(Tensor*) * num_splits);
+  size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensor->dims.num_dims);
+  for(unsigned int i = 0; i < tensor->dims.num_dims; i++){
+    dim_sizes[i] = tensor->dims.dim_sizes[i];
+  }
+
+  
+  dim_sizes[split_dim] = tensor->dims.dim_sizes[split_dim] / num_splits;
+  if(dim_sizes[split_dim] < 1)
+    ERROR("Split Dimension < 1 after splitting");
+
+  size_t copy_size = getTypeSize(tensor->data_type);
+  for(unsigned int i = split_dim; i < tensor->dims.num_dims; i++){
+    copy_size = copy_size * dim_sizes[i];
+  }
+  
+  for(unsigned int i = 0; i < num_splits; i++){
+    // FIXIT: Don't be specific to 4D tensors
+    // NOTE: Using same data format (NHWC/NCHW) for the split tensors
+    INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n",
+	 dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
+
+    Tensor* split = (Tensor*) create4DTensor(tensor->data_type, tensor->data_format,
+					  dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
+    
+    size_t copy_start = i * copy_size;
+    size_t copy_stride = num_splits * copy_size;
+    INFO("copy_size = %d, copy_start = %d, copy_stride = %d, tensor->size_in_bytes = %d \n",
+	 copy_size, copy_start, copy_stride, tensor->size_in_bytes);
+
+    int index = 0;
+    while(copy_start + copy_size <= tensor->size_in_bytes){
+      memcpy(((char*) split->host_data + (index * copy_size)),
+	     ((char*)tensor->host_data + copy_start),
+	     copy_size);
+      copy_start += copy_stride;
+      index++;
+    }
+   	
+    splits[i] = split;     
+  }
+
+  profileEvent("tensorSplit_end", true);
+
+  return (void**) splits;
+}
+
+
+void* tensorConcat(void** tensors_ptr, int num_splits, int split_dim){
+
+  INFO("*** TensorConcat \n");  
+  profileEvent("tensorConcat");
+
+  Tensor** tensors = (Tensor**) tensors_ptr;
+
+  for(int i = 0; i < num_splits; i++){
+    deviceToHostCopy(tensors[i]); // Concatenation done on the host
+  }
+  
+  // The no of dimensions of concatenated tensor are the same
+  size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensors[0]->dims.num_dims);
+  for(unsigned int i = 0; i < tensors[0]->dims.num_dims; i++){
+    dim_sizes[i] = tensors[0]->dims.dim_sizes[i];
+  }
+  
+  size_t copy_size = getTypeSize(tensors[0]->data_type);
+  for(unsigned int i = split_dim; i < tensors[0]->dims.num_dims; i++){
+    copy_size = copy_size * dim_sizes[i];
+  }
+
+  dim_sizes[split_dim] = dim_sizes[split_dim] * num_splits;
+  if(dim_sizes[split_dim] < 1)
+    ERROR("Split Dimension < 1 after concat");
+
+  Tensor* output = (Tensor*) create4DTensor(tensors[0]->data_type, tensors[0]->data_format,
+					 dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
+
+  INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n",
+       dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
+
+
+  int num_copies = 1;
+  for(unsigned int i = 0; i < split_dim; i++){
+    num_copies = num_copies * dim_sizes[i];
+  }
+  
+  size_t copy_stride = num_splits * copy_size;
+  INFO("copy_size = %d, num_copies = %d, copy_stride = %d, output->size_in_bytes = %d \n",
+       copy_size, num_copies, copy_stride, output->size_in_bytes);
+
+  for(unsigned int i = 0; i < num_copies; i++){
+    // FIXIT: Don't be specific to 4D tensors
+    size_t copy_start = i * copy_stride;
+   
+    for(int j = 0; j < num_splits; j++){
+      struct Tensor* split = tensors[j];
+      memcpy(((char*) output->host_data + copy_start + (j * copy_size)),
+	     ((char*) split->host_data + (i * copy_size)),
+	     copy_size);   
+    }      
+  }
+
+  profileEvent("tensorConcat_end", true);
+
+  return output;
+}
+
+
+
+void* tensorLRN(void* input_ptr, unsigned int LRN_window,
+		double LRN_alpha, double LRN_beta, double LRN_k){
+
+  INFO("*** TensorLRN \n");  
+  profileEvent("tensorLRN");
+
+  Tensor* input = (Tensor*) input_ptr;
+
+  hostToDeviceCopy(input);
+
+  float alpha = 1.0f, beta = 0.0f;
+  cudnnLRNDescriptor_t LRNDesc;
+  checkCUDNN(cudnnCreateLRNDescriptor(&LRNDesc));
+
+  INFO("window = %d, LRN_alpha = %f, LRN_beta = %f, LRN_k = %f \n",
+       LRN_window, LRN_alpha, LRN_beta, LRN_k);
+ 
+  
+  checkCUDNN(cudnnSetLRNDescriptor(LRNDesc, LRN_window, LRN_alpha, LRN_beta, LRN_k));
+
+  size_t* dim_sizes = input->dims.dim_sizes;
+  Tensor* output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
+			  CUDNN_TENSOR_NCHW, dim_sizes[0], dim_sizes[1],
+			  dim_sizes[2], dim_sizes[3]);
+  // NOTE: Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE); 
+  // NOTE: Necessary to insert the above call for every output tensor
+
+  printTensorDescInfo(input);
+  printTensorDescInfo(output);
+  
+  checkCUDNN(cudnnLRNCrossChannelForward(cudnnHandle, LRNDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1,
+					 &alpha, input->tensor_desc, input->gpu_data,
+					 &beta, output->tensor_desc, output->gpu_data));
+
+  profileEvent("tensorLRN_end", true);
+    
+  return output;
+}
+
+
+
+
+// FIXIT: Apparently this is not working for 3D tensors or dimensions other than 4D
+// Perhaps 3D, 2D tensors can be remapped to 4D tensors to make this work?
+void* tensorAdd(void* x_ptr, void* bias_ptr){
+
+  Tensor* x = (Tensor*) x_ptr;
+  Tensor* bias = (Tensor*) bias_ptr;
+  
+  INFO("*** TensorAdd \n");  
+  profileEvent("tensorAdd");
+    
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(x);
+  hostToDeviceCopy(bias);
+    
+  // FIXIT: routine fails for 3D tensors
+  checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc,
+			    bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data));
+
+  profileEvent("tensorAdd_end", true);
+
+
+  #ifdef ERROR_INJECTION_ENABLED  
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(x, op_acc);
+  add_norms(error_norms, "tensorAdd", op_acc);
+  add_bias_overheads(x, op_acc);
+  op_counter++;
+  
+  #endif
+  
+  
+  return x;
+}
+
+
+// FIXIT: Generalize all of the routines for types {half, float, double}
+void* tensorConvolution(void* input_ptr, void* filter_ptr,
+			int vertical_pad, int horizontal_pad,
+			int vertical_stride, int horizontal_stride,
+			int conv_mode, int compute_precision){
+
+  INFO("*** TensorConvolution \n");
+  profileEvent("tensorConv");
+
+  Tensor* input = (Tensor*) input_ptr;
+  Tensor* filter = (Tensor*) filter_ptr;
+  
+  cudnnConvolutionDescriptor_t convDesc;
+  cudnnConvolutionFwdAlgo_t convAlgo;
+  cudnnConvolutionMode_t mode;
+  if(conv_mode == 0)
+    mode = CUDNN_CONVOLUTION;
+  else if(conv_mode == 1)
+    mode = CUDNN_CROSS_CORRELATION;
+      
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+
+  cudnnDataType_t computeType = CUDNN_DATA_FLOAT;
+  if(compute_precision == 0){
+    computeType = CUDNN_DATA_FLOAT;
+  }
+
+  if(compute_precision == 1){
+    computeType = CUDNN_DATA_HALF;
+  }
+
+  // TODO: Support other cases;  
+  hostToDeviceCopy(input);
+  hostToDeviceCopy(filter);
+      
+  checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc));
+  // FIXIT: Think if upscaling values need to be configurable?
+  // IMP-FIXIT:  CUDNN Cross correlation is only used in the Lenet context
+  // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used?
+  checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc,
+					     vertical_pad, horizontal_pad, // conv padding
+					     vertical_stride, horizontal_stride, // conv strides
+					     1, 1, // upscaling values
+					     mode , // mode is configurable
+                                             computeType)); // defines compute precision
+
+  int n, c, h, w; // output dimensions  
+  // Find dimension of convolution output
+  checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc,
+						   input->tensor_desc,
+						   filter->filter_desc,
+						   &n, &c, &h, &w));
+
+    
+  DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
+
+  Tensor* output;
+  if(input->data_format == CUDNN_TENSOR_NCHW)
+    output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
+			              CUDNN_TENSOR_NCHW, n, c, h, w);
+  else if(input->data_format == CUDNN_TENSOR_NHWC){
+    DEBUG("* NHWC Format \n");
+    output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
+			              CUDNN_TENSOR_NHWC, n, h, w, c);
+  }
+  else
+    ERROR("Unsupported Tensor Type");
+
+  // NOTE: Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE); 
+  // NOTE: Necessary to insert the above call for every output tensor
+    
+  DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, H = %d, W = %d, C = %d \n",
+	output->data_type, output->data_format, output->dims.dim_sizes[0], output->dims.dim_sizes[1],
+	output->dims.dim_sizes[2], output->dims.dim_sizes[3]);
+
+  if(convDesc == NULL || input->tensor_desc == NULL ||
+     filter->filter_desc == NULL || output->tensor_desc == NULL)
+    ERROR("NULL descriptor! \n");
+
+
+  // Debugging info prints
+  printTensorDescInfo(input);
+  printTensorDescInfo(filter);
+  printTensorDescInfo(output);
+
+  // NOTE-FIXIT: function failing for NHWC formats - perhaps some CUDNN support is lacking
+  checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnnHandle,
+						 input->tensor_desc,
+						 filter->filter_desc,
+						 convDesc,
+						 output->tensor_desc,
+						 CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,	 
+						 //CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
+						 0,
+						 &convAlgo));
+
+  
+  DEBUG("ConvAlgo = %d, FFT = %d, GEMM = %d, WINOGRAD = %d \n", convAlgo,
+	 CUDNN_CONVOLUTION_FWD_ALGO_FFT, CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
+	 CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD);
+	 
+
+  // FIXIT: Algo shouldn't be hardcoded
+  convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
+
+  size_t workspace_size;
+  checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle,
+						     input->tensor_desc,
+						     filter->filter_desc,
+						     convDesc,
+						     output->tensor_desc,
+						     convAlgo,
+						     &workspace_size));
+
+  // Allocating memory for the convolution workspace
+  void* workspace;
+  checkCudaErrors(cudaMalloc(&workspace, workspace_size)); 
+  DEBUG("workspace size = %d \n", workspace_size);
+
+
+  checkCUDNN(cudnnConvolutionForward(cudnnHandle, &alpha, input->tensor_desc,
+				     input->gpu_data, filter->filter_desc, filter->gpu_data,
+				     convDesc, convAlgo, workspace, workspace_size,
+				     &beta, output->tensor_desc, output->gpu_data));
+		       
+  profileEvent("tensorConv_end", true);
+
+
+  #ifdef ERROR_INJECTION_ENABLED
+
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(output, op_acc);
+  add_norms(error_norms, "tensorConv", op_acc);
+  add_conv_overheads(input, filter, vertical_stride, horizontal_stride, op_acc);
+
+  op_counter++;
+  
+  #endif
+  
+  
+  return output;
+}
+
+
+
+
+
+
+// FIXIT: Currently this only computes MAX pooling
+// FIXIT: Add support for Average Pooling
+void* tensorPooling(void* input_ptr,
+		    int poolFunction,
+		    int window_height, int window_width,
+		    int vertical_pad, int horizontal_pad,
+		    int vertical_stride, int horizontal_stride){
+
+  INFO("*** TensorPooling \n");
+  profileEvent("tensorPooling");
+
+  Tensor* input = (Tensor*) input_ptr;
+
+  cudnnPoolingDescriptor_t poolDesc;
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+
+  hostToDeviceCopy(input);
+  
+  checkCUDNN(cudnnCreatePoolingDescriptor(&poolDesc));            
+
+  int n = input->dims.dim_sizes[0];
+  int c = input->dims.dim_sizes[1];
+  int h = (input->dims.dim_sizes[2] + (2 * vertical_pad) - window_height) / vertical_stride;
+  h = h + 1;
+  int w = (input->dims.dim_sizes[3] + (2 * horizontal_pad) - window_width) / horizontal_stride;
+  w = w + 1;
+
+  DEBUG("n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
+  
+  // FIXIT: Don't be specific to floats
+  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, n, c, h, w);
+  // Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE); 
+
+  // FIXIT: Fix being specific to CUDNN_DATA_FLOAT and NCHW format
+  // FIXIT: Is this setTensor even needed?
+  checkCUDNN(cudnnSetTensor4dDescriptor(output->tensor_desc,
+					CUDNN_TENSOR_NCHW,
+					CUDNN_DATA_FLOAT,
+					n, c,
+					h, w));
+
+  // FIXIT: Make the pool function (max, min, avg) configurable
+  checkCUDNN(cudnnSetPooling2dDescriptor(poolDesc,
+					 CUDNN_POOLING_MAX,
+					 CUDNN_PROPAGATE_NAN,
+					 window_height, window_width,
+					 vertical_pad, horizontal_pad,
+					 vertical_stride, horizontal_stride));
+     
+  checkCUDNN(cudnnPoolingForward(cudnnHandle, poolDesc, &alpha, input->tensor_desc,
+				 input->gpu_data, &beta, output->tensor_desc, output->gpu_data));
+
+  profileEvent("tensorPooling_end", true);
+
+
+  #ifdef ERROR_INJECTION_ENABLED
+
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(output, op_acc);
+  add_norms(error_norms, "tensorPooling", op_acc);
+  add_pool_overheads(input, window_height, vertical_stride, op_acc);
+
+  op_counter++;
+  
+  #endif
+
+  
+  return output;
+}
+
+
+
+
+void* tensorGemmCPU(void* lhs_ptr, void* rhs_ptr){
+
+  INFO("*** TensorGemmCPU \n");
+
+  Tensor* lhs = (Tensor*) lhs_ptr;
+  Tensor* rhs = (Tensor*) rhs_ptr;
+  
+  // The operation is done on the CPU
+  deviceToHostCopy(lhs);
+  deviceToHostCopy(rhs);
+
+  if(lhs->data_type != CUDNN_DATA_FLOAT){
+    ERROR("Currently only Floating point is supported ");
+  }
+  
+  profileEvent("tensorGemmCPU");
+  
+  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
+  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
+
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+  // 'm' holds the batch dimension - assuming NCHW format Tensors
+  int m = lhs->dims.dim_sizes[0];
+  // The rhs must be a 2D tensor
+  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
+  int k = 1;
+  // Flattening the dimensions after the batch dimension
+  // NOTE: Allowing any number of dimensions > 2 for lhs
+  for (int j = 1 ; j < lhs->dims.num_dims; j++){
+    k = k * lhs->dims.dim_sizes[j]; // input neurons
+  }
+
+  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
+  // Dimension-note: Check if k is same across the two tensors
+  INFO("m = %d, n = %d, k = %d \n", m, n, k);
+  if(rhs_k != k){
+    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
+  }
+
+  // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
+  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
+  // Changing output tensor placement from host to device
+  changeTensorPlacement(output, HOST); 
+
+  float* lhs_arr = (float*) lhs->host_data;
+  float* rhs_arr = (float*) rhs->host_data;
+  float* output_arr = (float*) output->host_data;
+  
+  for(int i = 0; i < m; i++){
+    for(int j = 0; j < n; j++){
+      float sum = 0.0;
+      for(int l = 0; l < k; l++){
+	float mul = lhs_arr[i*k+l] * rhs_arr[l*n+j];
+	sum = sum + mul;
+      }
+      output_arr[i*n+j] = sum;
+    }
+  }
+      
+   
+  profileEvent("tensorGemmCPU_end", true);
+  
+  return output;
+}
+
+
+
+// Reference: https://gist.github.com/peterwittek/6303527
+void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr){
+
+  INFO("*** TensorGemmGPU \n");
+  profileEvent("tensorGemmGPU");
+
+  Tensor* lhs = (Tensor*) lhs_ptr;
+  Tensor* rhs = (Tensor*) rhs_ptr;
+  
+  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
+  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
+
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+  // 'm' holds the batch dimension - assuming NCHW format Tensors
+  int m = lhs->dims.dim_sizes[0];
+  // The rhs last dimension must contain the neurons
+  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
+  int k = 1;
+  
+  // Flattening the dimensions after the batch dimension
+  // NOTE: Allowing any number of dimensions > 2 for lhs
+  for (int j = 1 ; j < lhs->dims.num_dims; j++){
+    k = k * lhs->dims.dim_sizes[j]; // input neurons
+  }
+
+  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
+  // Dimension-note: Check if k is same across the two tensors
+  INFO("m = %d, n = %d, k = %d \n", m, n, k);
+  if(rhs_k != k){
+    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
+  }
+
+  // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
+  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
+  //struct Tensor* output = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
+
+  //tensors_ptr.push_back(output->gpu_data);
+  
+  // Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE); 
+
+  hostToDeviceCopy(lhs);
+  hostToDeviceCopy(rhs);
+
+  //profileEvent("cublasSgemm");
+  
+  // INFO: cuBlas uses column-major format
+  // INFO: The leading dimension is just the FIRST Dimension
+  // IMP: The output is N * M in column-major format, which is M*N in row-major - what cuDNN expects
+  checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N,
+			      n, m, k,
+			      &alpha,
+			      (float*) rhs->gpu_data, n,
+			      (float*) lhs->gpu_data, k,
+			      &beta,
+			      (float*) output->gpu_data, n));  
+
+  // profileEvent("cublasSgemm_end", true);
+
+  
+  profileEvent("tensorGemmGPU_end", true);
+
+
+
+ #ifdef ERROR_INJECTION_ENABLED
+
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(output, op_acc);
+  add_norms(error_norms, "tensorGemm", op_acc);
+  add_gemm_overheads(lhs_ptr, rhs_ptr, op_acc);
+
+  op_counter++;
+  
+  #endif
+ 
+  
+  return output;
+}
+
+
+
+
+
+
+
+void* tensorGemm(void* lhs_ptr, void* rhs_ptr){
+
+  INFO("*** TensorGemm \n");
+  profileEvent("tensorGemm");
+
+  Tensor* lhs = (Tensor*) lhs_ptr;
+  Tensor* rhs = (Tensor*) rhs_ptr;
+    
+  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
+  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
+
+  // FIXIT: Need to be more aware of the implications of alpha and beta
+  float alpha = 1.0f, beta = 0.0f;
+  // 'm' holds the batch dimension - assuming NCHW format Tensors
+  int m = lhs->dims.dim_sizes[0];
+  // The rhs last dimension must contain the neurons
+  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
+  int k = 1;
+  // Flattening the dimensions after the batch dimension
+  // NOTE: Allowing any number of dimensions > 2 for lhs
+  for (int j = 1 ; j < lhs->dims.num_dims; j++){
+    k = k * lhs->dims.dim_sizes[j]; // input neurons
+  }
+
+  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
+  // Dimension-note: Check if k is same across the two tensors
+  INFO("m = %d, n = %d, k = %d \n", m, n, k);
+  if(rhs_k != k){
+    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
+  }
+
+  // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
+  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
+  // Changing output tensor placement from host to device
+  changeTensorPlacement(output, DEVICE); 
+
+  hostToDeviceCopy(lhs);
+  hostToDeviceCopy(rhs);
+
+  // INFO: cuBlas uses column-major format
+  // INFO: The leading dimension is just the FIRST Dimension
+  // IMP: The output is N * M in column-major format, which is M*N in row-major - what cuDNN expects
+  checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N,
+			      n, m, k,
+			      &alpha,
+			      (float*) rhs->gpu_data, k,
+			      (float*) lhs->gpu_data, k,
+			      &beta,
+			      (float*) output->gpu_data, n));
+  
+  profileEvent("tensorGemm_end", true);
+  
+  return output;
+}
+
+
+
+
+// FIXIT: Add dimension check assertions throughout the code
+void* tensorGemmBias(void* input_ptr, void* bias_ptr){
+
+  INFO("*** TensorGemmBias \n");
+  profileEvent("tensorGemmBias");
+
+  Tensor* input = (Tensor*) input_ptr;
+  Tensor* bias = (Tensor*) bias_ptr;  
+
+  // IMP: beta is set to 1 to append to input
+  // C = A * B + Beta * C
+  float alpha = 1.0f, beta = 1.0f;
+  // 'm' holds the batch dimension - assuming NCHW format Tensors
+  int m = input->dims.dim_sizes[0];
+  // The bias must be a 2D tensor
+  int n = bias->dims.dim_sizes[bias->dims.num_dims - 1]; // output neurons
+
+  INFO("m = %d, n = %d \n", m, n);
+  
+  hostToDeviceCopy(input);
+  hostToDeviceCopy(bias);
+
+  struct Tensor* onevec = (Tensor*) create2DTensor(CUDNN_DATA_FLOAT, m, 1);
+  fillOnes(onevec);
+  hostToDeviceCopy(onevec);
+  
+  // INFO: cuBlas uses column-major format
+  // INFO: The leading dimension is just the FIRST Dimension
+  // CONFUSION: Not sure when to transpose and when not to
+  checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N,
+			      n, m, 1,
+			      &alpha,
+			      (float*) bias->gpu_data, n,
+			      (float*) onevec->gpu_data, 1,
+ 			      &beta,
+			      (float*) input->gpu_data, n));
+
+  profileEvent("tensorGemmBias_end", true);
+  
+  return input;
+}
+
+
+void* tensorRelu(void* input_ptr){
+
+  INFO("*** TensorRelu \n");
+  profileEvent("tensorRelu");
+
+  Tensor* input = (Tensor*) input_ptr;
+  
+  cudnnActivationDescriptor_t reluDesc;
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(input);
+
+  checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc));
+
+  checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_RELU,
+					  CUDNN_PROPAGATE_NAN, 0.0));
+
+  checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha,
+				    input->tensor_desc, input->gpu_data, &beta,
+				    input->tensor_desc, input->gpu_data));
+
+  profileEvent("tensorRelu_end", true);
+
+
+  #ifdef ERROR_INJECTION_ENABLED
+  
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(input, op_acc);
+  add_norms(error_norms, "tensorRelu", op_acc);
+  add_relu_overheads(input, op_acc);
+  op_counter++;  
+  #endif
+  
+
+  return input;
+}
+
+
+// Think: Should Softmax be broken into individual IR operations?
+void* tensorSoftmax(void* input_ptr){
+
+  INFO("*** TensorSoftmax \n");
+  profileEvent("tensorSoftmax");
+
+  Tensor* input = (Tensor*) input_ptr;
+
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(input);
+
+  // IMP: CUDNN_SOFTMAX_ACCURATE can be replaced with a less acurate but faster version - CUDNN_SOFTMAX_FAST
+  // However, not sure if the Softmax overhead is even a concern
+  // TODO: Figure out if mode should be MODE_CHANNEL or MODE_INSTANCE?
+  checkCUDNN(cudnnSoftmaxForward(cudnnHandle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL,
+				 &alpha, input->tensor_desc, input->gpu_data, &beta,
+				 input->tensor_desc, input->gpu_data));
+
+  deviceToHostCopy(input);  
+  profileEvent("tensorSoftmax_end", true);
+  
+  return input;
+}
+
+
+
+__global__ void clipValues(float* A, float min, float max, int n){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id < n){
+    A[id] = fmaxf(min, A[id]);
+    A[id] = fminf(max, A[id]);
+  }
+}
+
+
+
+void* tensorRelu2(void* input_ptr, float min, float max){
+
+  INFO("*** TensorClippedRelu *** \n");
+  profileEvent("tensorClippedRelu");
+
+  cudnnActivationDescriptor_t reluDesc;
+  float alpha = 1.0f, beta = 0.0f;
+  
+  Tensor* input = (Tensor*) input_ptr;
+  hostToDeviceCopy(input);
+
+
+  checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc));
+
+  checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_CLIPPED_RELU,
+					  CUDNN_PROPAGATE_NAN, 2.0));
+
+  checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha,
+				    input->tensor_desc, input->gpu_data, &beta,
+				    input->tensor_desc, input->gpu_data));
+
+  
+  /*int blockSize = 128;
+  int gridSize = (int) ceil ((float) input->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  // NOTE: Check if a large gridSize will work with really large tensors
+  //clipValues<<<gridSize, blockSize>>>((float*) input->gpu_data, min, max, input->num_elems);
+  */
+  
+  profileEvent("tensorClippedRelu_end", true);
+
+
+  #ifdef ERROR_INJECTION_ENABLED
+  
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(input, op_acc);
+  add_norms(error_norms, "tensorClippedRelu", op_acc);
+  add_relu_overheads(input, op_acc);
+  op_counter++;  
+  #endif
+  
+
+  return input;
+}
+
+
+void* tensorTanh(void* input_ptr){
+
+  INFO("*** TensorTanh \n");
+  profileEvent("tensorTanh");
+
+  Tensor* input = (Tensor*) input_ptr;
+  
+  cudnnActivationDescriptor_t tanhDesc;
+  float alpha = 1.0f, beta = 0.0f;
+  hostToDeviceCopy(input);
+
+  checkCUDNN(cudnnCreateActivationDescriptor(&tanhDesc));
+
+  checkCUDNN(cudnnSetActivationDescriptor(tanhDesc, CUDNN_ACTIVATION_TANH,
+					  CUDNN_PROPAGATE_NAN, 0.0));
+
+  checkCUDNN(cudnnActivationForward(cudnnHandle, tanhDesc, &alpha,
+				    input->tensor_desc, input->gpu_data, &beta,
+				    input->tensor_desc, input->gpu_data));
+
+  profileEvent("tensorTanh_end", true);
+
+
+  #ifdef ERROR_INJECTION_ENABLED
+  
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  
+  int op_acc = op_accuracies[op_counter];
+  void* error_norms = tensorAddError(input, op_acc);
+  add_norms(error_norms, "tensorTanh", op_acc);
+  add_relu_overheads(input, op_acc);
+  op_counter++;  
+  #endif
+  
+
+  return input;
+}
+
+
+