From 008c7b697d3498f6abc99c7f85a1aca56decaa8d Mon Sep 17 00:00:00 2001
From: Akash Kothari <akashk4@tyler.cs.illinois.edu>
Date: Tue, 12 Jan 2021 08:45:09 -0600
Subject: [PATCH] Add tensor_runtime.ll

---
 .../hpvm-tensor-rt/lib/tensor_runtime.ll      | 149 ++++++------
 .../hpvm-tensor-rt/lib/tensor_runtime_back.ll | 229 ------------------
 2 files changed, 73 insertions(+), 305 deletions(-)
 delete mode 100644 hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime_back.ll

diff --git a/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll b/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll
index efef5f2c2f..e0452ac7ff 100644
--- a/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll
+++ b/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll
@@ -1,12 +1,12 @@
-; ModuleID = '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt//lib/tensor_runtime.bc'
-source_filename = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt//tensor_runtime/include/tensor_signatures.cc"
+; ModuleID = '/home/evanzhao/UIUC/approxtuner/hpvm/hpvm/llvm/tools/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc'
+source_filename = "/home/evanzhao/UIUC/approxtuner/hpvm/hpvm/llvm/tools/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 %"class.std::ios_base::Init" = type { i8 }
 
 @_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
-@__dso_handle = external global i8
+@__dso_handle = external hidden global i8
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_tensor_signatures.cc, i8* null }]
 
 ; Function Attrs: noinline uwtable
@@ -17,16 +17,16 @@ entry:
   ret void
 }
 
-declare void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1
+declare dso_local void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1
 
 ; Function Attrs: nounwind
-declare void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #2
+declare dso_local void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #2
 
 ; Function Attrs: nounwind
-declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #3
+declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #3
 
-; Function Attrs: noinline nounwind uwtable
-define void @_Z13dummyFunctionv() #4 {
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @_Z13dummyFunctionv() #4 {
 entry:
   %initRT = alloca i8*, align 8
   %cleanRT = alloca i8*, align 8
@@ -67,10 +67,7 @@ entry:
   %FCLayer = alloca i8*, align 8
   %ConvLayer_ = alloca i8*, align 8
   %ConvLayer2 = alloca i8*, align 8
-<<<<<<< HEAD
-=======
   %GroupConvLayer = alloca i8*, align 8
->>>>>>> 2a31471f4de89edd9180689d139be7ca65b0df08
   %FCLayer2 = alloca i8*, align 8
   %AddWrapper = alloca i8*, align 8
   %ReluWrapper = alloca i8*, align 8
@@ -126,10 +123,7 @@ entry:
   store i8* bitcast (i8* (i8*, float, float, i8*, float, float, i8*, float, float, i32, float, float, i32)* @FCLayer_PROMISE to i8*), i8** %FCLayer, align 8
   store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float)* @wrapper_ConvLayer to i8*), i8** %ConvLayer_, align 8
   store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float)* @wrapper_ConvLayer2 to i8*), i8** %ConvLayer2, align 8
-<<<<<<< HEAD
-=======
   store i8* bitcast (i8* (i8*, i8*, i8*, i32, i32, i32, i32, i32, i32)* @wrapper_tensorGroupConvolution to i8*), i8** %GroupConvLayer, align 8
->>>>>>> 2a31471f4de89edd9180689d139be7ca65b0df08
   store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, float, float)* @wrapper_FCLayer to i8*), i8** %FCLayer2, align 8
   store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorAdd to i8*), i8** %AddWrapper, align 8
   store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorRelu to i8*), i8** %ReluWrapper, align 8
@@ -149,117 +143,117 @@ entry:
   ret void
 }
 
-declare void @llvm_hpvm_initTensorRt(i32) #1
+declare dso_local void @llvm_hpvm_initTensorRt(i32) #1
 
-declare void @llvm_hpvm_cleanupTensorRt() #1
+declare dso_local void @llvm_hpvm_cleanupTensorRt() #1
 
-declare void @llvm_hpvm_initApproxhpvmRt(i32) #1
+declare dso_local void @llvm_hpvm_initApproxhpvmRt(i32) #1
 
-declare void @llvm_hpvm_cleanupApproxhpvmRt() #1
+declare dso_local void @llvm_hpvm_cleanupApproxhpvmRt() #1
 
-declare void @llvm_hpvm_initializeRuntimeController(i8*, i8*) #1
+declare dso_local void @llvm_hpvm_initializeRuntimeController(i8*, i8*) #1
 
-declare void @llvm_hpvm_clearRuntimeController() #1
+declare dso_local void @llvm_hpvm_clearRuntimeController() #1
 
-declare void @hpvm_request_tensor(i8*, i32) #1
+declare dso_local void @hpvm_request_tensor(i8*, i32) #1
 
-declare void @startProfiling() #1
+declare dso_local void @startProfiling() #1
 
-declare void @stopProfiling() #1
+declare dso_local void @stopProfiling() #1
 
-declare i8* @create2DTensor(i32, i64, i64) #1
+declare dso_local i8* @create2DTensor(i32, i64, i64) #1
 
-declare i8* @create3DTensor(i32, i64, i64, i64) #1
+declare dso_local i8* @create3DTensor(i32, i64, i64, i64) #1
 
-declare i8* @create4DTensor(i32, i32, i64, i64, i64, i64) #1
+declare dso_local i8* @create4DTensor(i32, i32, i64, i64, i64, i64) #1
 
-declare void @initTensorData(i8*, i8*, i64) #1
+declare dso_local void @initTensorData(i8*, i8*, i64) #1
 
-declare i8** @tensorSplit(i8*, i32, i32) #1
+declare dso_local i8** @tensorSplit(i8*, i32, i32) #1
 
-declare i8* @tensorConcat(i8**, i32, i32) #1
+declare dso_local i8* @tensorConcat(i8**, i32, i32) #1
 
-declare i8* @tensorConvolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #1
+declare dso_local i8* @tensorConvolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #1
 
-declare i8* @tensorHalfConvolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #1
+declare dso_local i8* @tensorHalfConvolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #1
 
-declare i8* @tensorPooling(i8*, i32, i32, i32, i32, i32, i32, i32) #1
+declare dso_local i8* @tensorPooling(i8*, i32, i32, i32, i32, i32, i32, i32) #1
 
-declare i8* @tensorHalfPooling(i8*, i32, i32, i32, i32, i32, i32, i32) #1
+declare dso_local i8* @tensorHalfPooling(i8*, i32, i32, i32, i32, i32, i32, i32) #1
 
-declare i8* @tensorLRN(i8*, i32, double, double, double) #1
+declare dso_local i8* @tensorLRN(i8*, i32, double, double, double) #1
 
-declare i8* @tensorGemm(i8*, i8*) #1
+declare dso_local i8* @tensorGemm(i8*, i8*) #1
 
-declare i8* @tensorGemmCPU(i8*, i8*) #1
+declare dso_local i8* @tensorGemmCPU(i8*, i8*) #1
 
-declare i8* @tensorGemmGPU(i8*, i8*) #1
+declare dso_local i8* @tensorGemmGPU(i8*, i8*) #1
 
-declare i8* @tensorHalfGemm(i8*, i8*) #1
+declare dso_local i8* @tensorHalfGemm(i8*, i8*) #1
 
-declare i8* @tensorGemmBias(i8*, i8*) #1
+declare dso_local i8* @tensorGemmBias(i8*, i8*) #1
 
-declare i8* @tensorAdd(i8*, i8*) #1
+declare dso_local i8* @tensorAdd(i8*, i8*) #1
 
-declare i8* @tensorHalfAdd(i8*, i8*) #1
+declare dso_local i8* @tensorHalfAdd(i8*, i8*) #1
 
-declare i8* @tensorRelu(i8*) #1
+declare dso_local i8* @tensorRelu(i8*) #1
 
-declare i8* @tensorRelu2(i8*, float, float) #1
+declare dso_local i8* @tensorRelu2(i8*, float, float) #1
 
-declare i8* @tensorHalfRelu2(i8*, float, float) #1
+declare dso_local i8* @tensorHalfRelu2(i8*, float, float) #1
 
-declare i8* @tensorTanh(i8*) #1
+declare dso_local i8* @tensorTanh(i8*) #1
 
-declare i8* @tensorHalfTanh(i8*) #1
+declare dso_local i8* @tensorHalfTanh(i8*) #1
 
-declare i8* @tensorSoftmax(i8*) #1
+declare dso_local i8* @tensorSoftmax(i8*) #1
 
-declare i8* @tensorBatchNorm(i8*, i8*, i8*, i8*, i8*, double) #1
+declare dso_local i8* @tensorBatchNorm(i8*, i8*, i8*, i8*, i8*, double) #1
 
-declare i8* @tensorAddError(i8*, i32) #1
+declare dso_local i8* @tensorAddError(i8*, i32) #1
 
-declare i8* @ConvLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, float, i32, i32, i32, i32, i32, i32, i32, float, float, i32) #1
+declare dso_local i8* @ConvLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, float, i32, i32, i32, i32, i32, i32, i32, float, float, i32) #1
 
-declare i8* @FCLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, float, i32, float, float, i32) #1
+declare dso_local i8* @FCLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, float, i32, float, float, i32) #1
 
-declare i8* @wrapper_ConvLayer(i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float) #1
+declare dso_local i8* @wrapper_ConvLayer(i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float) #1
 
-declare i8* @wrapper_ConvLayer2(i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float) #1
+declare dso_local i8* @wrapper_ConvLayer2(i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float) #1
 
-declare i8* @wrapper_tensorGroupConvolution(i8*, i8*, i8*, i32, i32, i32, i32, i32, i32) #1
+declare dso_local i8* @wrapper_tensorGroupConvolution(i8*, i8*, i8*, i32, i32, i32, i32, i32, i32) #1
 
-declare i8* @wrapper_FCLayer(i8*, i8*, i8*, i8*, i32, float, float) #1
+declare dso_local i8* @wrapper_FCLayer(i8*, i8*, i8*, i8*, i32, float, float) #1
 
-declare i8* @wrapper_tensorAdd(i8*, i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorAdd(i8*, i8*, i8*) #1
 
-declare i8* @wrapper_tensorRelu(i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorRelu(i8*, i8*) #1
 
-declare i8* @wrapper_tensorTanh(i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorTanh(i8*, i8*) #1
 
-declare i8* @wrapper_tensorBatchNorm(i8*, i8*, i8*, i8*, i8*, i8*, double) #1
+declare dso_local i8* @wrapper_tensorBatchNorm(i8*, i8*, i8*, i8*, i8*, i8*, double) #1
 
-declare i8* @wrapper_tensorPooling(i8*, i8*, i32, i32, i32, i32, i32, i32, i32) #1
+declare dso_local i8* @wrapper_tensorPooling(i8*, i8*, i32, i32, i32, i32, i32, i32, i32) #1
 
-declare i8* @wrapper_tensorSoftmax(i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorSoftmax(i8*, i8*) #1
 
-declare i8* @wrapper_tensorFft(i8*, i8*, i1 zeroext) #1
+declare dso_local i8* @wrapper_tensorFft(i8*, i8*, i1 zeroext) #1
 
-declare i8* @wrapper_tensorReduce(i8*, i8*, i32, i32) #1
+declare dso_local i8* @wrapper_tensorReduce(i8*, i8*, i32, i32) #1
 
-declare i8* @wrapper_tensorProjectiveT(i8*, i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorProjectiveT(i8*, i8*, i8*) #1
 
-declare i8* @wrapper_tensorMap1(i8*, i32, i8*) #1
+declare dso_local i8* @wrapper_tensorMap1(i8*, i32, i8*) #1
 
-declare i8* @wrapper_tensorMap2(i8*, i32, i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorMap2(i8*, i32, i8*, i8*) #1
 
-declare i8* @wrapper_tensorMap3(i8*, i32, i8*, i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorMap3(i8*, i32, i8*, i8*, i8*) #1
 
-declare i8* @wrapper_tensorStencil(i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorStencil(i8*, i8*) #1
 
-declare i8* @wrapper_tensorCosineT(i8*, i8*) #1
+declare dso_local i8* @wrapper_tensorCosineT(i8*, i8*) #1
 
-declare i8* @tensor_set_node_id(i32) #1
+declare dso_local i8* @tensor_set_node_id(i32) #1
 
 ; Function Attrs: noinline uwtable
 define internal void @_GLOBAL__sub_I_tensor_signatures.cc() #0 section ".text.startup" {
@@ -268,12 +262,15 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #3 = { nounwind }
-attributes #4 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!llvm.ident = !{!0}
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (git@gitlab.engr.illinois.edu:llvm/hpvm.git 45dcc405b732c5560328fb5be2290dfa81961217)"}
 
-!0 = !{!"clang version 4.0.1 "}
diff --git a/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime_back.ll b/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime_back.ll
deleted file mode 100644
index 1852fc070f..0000000000
--- a/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime_back.ll
+++ /dev/null
@@ -1,229 +0,0 @@
-; ModuleID = 'lib/tensor_runtime.bc'
-source_filename = "tensor_runtime/include/tensor_signatures.cc"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: noinline nounwind uwtable
-define void @_Z13dummyFunctionv() #0 {
-entry:
-  %initRT = alloca i8*, align 8
-  %cleanRT = alloca i8*, align 8
-  %initApproxRT = alloca i8*, align 8
-  %cleanApproxRT = alloca i8*, align 8
-  %initRTController = alloca i8*, align 8
-  %cleanRTController = alloca i8*, align 8
-  %request_tensorPtr = alloca i8*, align 8
-  %startProf = alloca i8*, align 8
-  %stopProf = alloca i8*, align 8
-  %create2Dptr = alloca i8*, align 8
-  %create3Dptr = alloca i8*, align 8
-  %create4Dptr = alloca i8*, align 8
-  %initTensorPtr = alloca i8*, align 8
-  %tensorSplitPtr = alloca i8*, align 8
-  %tensorConcatPtr = alloca i8*, align 8
-  %tensorConvPtr = alloca i8*, align 8
-  %tensorHConvPtr = alloca i8*, align 8
-  %tensorPoolPtr = alloca i8*, align 8
-  %tensorHalfPoolPtr = alloca i8*, align 8
-  %tensorLRNPtr = alloca i8*, align 8
-  %tensorGemmPr = alloca i8*, align 8
-  %tensorGemmCPUPtr = alloca i8*, align 8
-  %tensorGemmGPUPtr = alloca i8*, align 8
-  %tensorHgemmPtr = alloca i8*, align 8
-  %tensorGemmBiasPtr = alloca i8*, align 8
-  %tensorAddPtr = alloca i8*, align 8
-  %tensorHalfAddPtr = alloca i8*, align 8
-  %tensorReluPtr = alloca i8*, align 8
-  %tensorRelu2Ptr = alloca i8*, align 8
-  %tensorHalfRelu2Ptr = alloca i8*, align 8
-  %tensorTanhPtr = alloca i8*, align 8
-  %tensorHalfTanhPtr = alloca i8*, align 8
-  %tensorSoftmaxPtr = alloca i8*, align 8
-  %tensorBatchNormPtr = alloca i8*, align 8
-  %tensorAddErrorPtr = alloca i8*, align 8
-  %ConvLayer = alloca i8*, align 8
-  %FCLayer = alloca i8*, align 8
-  %ConvLayer2 = alloca i8*, align 8
-  %FCLayer2 = alloca i8*, align 8
-  %AddWrapper = alloca i8*, align 8
-  %ReluWrapper = alloca i8*, align 8
-  %TanhWrapper = alloca i8*, align 8
-  %BatchNormWrapper = alloca i8*, align 8
-  %PoolingWrapper = alloca i8*, align 8
-  %softmaxWrapper = alloca i8*, align 8
-  %tensorFft = alloca i8*, align 8
-  %tensorReduce = alloca i8*, align 8
-  %tensorProjectiveT = alloca i8*, align 8
-  %tensorMap1 = alloca i8*, align 8
-  %tensorMap2 = alloca i8*, align 8
-  %tensorMap3 = alloca i8*, align 8
-  %tensorStencil = alloca i8*, align 8
-  %tensorCosineT = alloca i8*, align 8
-  store i8* bitcast (void (i32)* @llvm_hpvm_initTensorRt to i8*), i8** %initRT, align 8
-  store i8* bitcast (void ()* @llvm_hpvm_cleanupTensorRt to i8*), i8** %cleanRT, align 8
-  store i8* bitcast (void (i32)* @llvm_hpvm_initApproxhpvmRt to i8*), i8** %initApproxRT, align 8
-  store i8* bitcast (void ()* @llvm_hpvm_cleanupApproxhpvmRt to i8*), i8** %cleanApproxRT, align 8
-  store i8* bitcast (void (i8*, i8*)* @llvm_hpvm_initializeRuntimeController to i8*), i8** %initRTController, align 8
-  store i8* bitcast (void ()* @llvm_hpvm_clearRuntimeController to i8*), i8** %cleanRTController, align 8
-  store i8* bitcast (void (i8*, i32)* @hpvm_request_tensor to i8*), i8** %request_tensorPtr, align 8
-  store i8* bitcast (void ()* @startProfiling to i8*), i8** %startProf, align 8
-  store i8* bitcast (void ()* @stopProfiling to i8*), i8** %stopProf, align 8
-  store i8* bitcast (i8* (i32, i64, i64)* @create2DTensor to i8*), i8** %create2Dptr, align 8
-  store i8* bitcast (i8* (i32, i64, i64, i64)* @create3DTensor to i8*), i8** %create3Dptr, align 8
-  store i8* bitcast (i8* (i32, i32, i64, i64, i64, i64)* @create4DTensor to i8*), i8** %create4Dptr, align 8
-  store i8* bitcast (void (i8*, i8*, i64)* @initTensorData to i8*), i8** %initTensorPtr, align 8
-  store i8* bitcast (i8** (i8*, i32, i32)* @tensorSplit to i8*), i8** %tensorSplitPtr, align 8
-  store i8* bitcast (i8* (i8**, i32, i32)* @tensorConcat to i8*), i8** %tensorConcatPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*, i32, i32, i32, i32, i32, i32)* @tensorConvolution to i8*), i8** %tensorConvPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*, i32, i32, i32, i32, i32, i32)* @tensorHalfConvolution to i8*), i8** %tensorHConvPtr, align 8
-  store i8* bitcast (i8* (i8*, i32, i32, i32, i32, i32, i32, i32)* @tensorPooling to i8*), i8** %tensorPoolPtr, align 8
-  store i8* bitcast (i8* (i8*, i32, i32, i32, i32, i32, i32, i32)* @tensorHalfPooling to i8*), i8** %tensorHalfPoolPtr, align 8
-  store i8* bitcast (i8* (i8*, i32, double, double, double)* @tensorLRN to i8*), i8** %tensorLRNPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorGemm to i8*), i8** %tensorGemmPr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorGemmCPU to i8*), i8** %tensorGemmCPUPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorGemmGPU to i8*), i8** %tensorGemmGPUPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorHalfGemm to i8*), i8** %tensorHgemmPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorGemmBias to i8*), i8** %tensorGemmBiasPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorAdd to i8*), i8** %tensorAddPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @tensorHalfAdd to i8*), i8** %tensorHalfAddPtr, align 8
-  store i8* bitcast (i8* (i8*)* @tensorRelu to i8*), i8** %tensorReluPtr, align 8
-  store i8* bitcast (i8* (i8*, float, float)* @tensorRelu2 to i8*), i8** %tensorRelu2Ptr, align 8
-  store i8* bitcast (i8* (i8*, float, float)* @tensorHalfRelu2 to i8*), i8** %tensorHalfRelu2Ptr, align 8
-  store i8* bitcast (i8* (i8*)* @tensorTanh to i8*), i8** %tensorTanhPtr, align 8
-  store i8* bitcast (i8* (i8*)* @tensorHalfTanh to i8*), i8** %tensorHalfTanhPtr, align 8
-  store i8* bitcast (i8* (i8*)* @tensorSoftmax to i8*), i8** %tensorSoftmaxPtr, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i8*, double)* @tensorBatchNorm to i8*), i8** %tensorBatchNormPtr, align 8
-  store i8* bitcast (i8* (i8*, i32)* @tensorAddError to i8*), i8** %tensorAddErrorPtr, align 8
-  store i8* bitcast (i8* (i8*, float, float, i8*, float, float, i8*, float, float, i32, i32, i32, i32, i32, i32, i32, float, float, i32)* @ConvLayer_PROMISE to i8*), i8** %ConvLayer, align 8
-  store i8* bitcast (i8* (i8*, float, float, i8*, float, float, i8*, float, float, i32, float, float, i32)* @FCLayer_PROMISE to i8*), i8** %FCLayer, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float)* @wrapper_ConvLayer to i8*), i8** %ConvLayer2, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, float, float)* @wrapper_FCLayer to i8*), i8** %FCLayer2, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorAdd to i8*), i8** %AddWrapper, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorRelu to i8*), i8** %ReluWrapper, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorTanh to i8*), i8** %TanhWrapper, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i8*, i8*, double)* @wrapper_tensorBatchNorm to i8*), i8** %BatchNormWrapper, align 8
-  store i8* bitcast (i8* (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)* @wrapper_tensorPooling to i8*), i8** %PoolingWrapper, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorSoftmax to i8*), i8** %softmaxWrapper, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorFft to i8*), i8** %tensorFft, align 8
-  store i8* bitcast (i8* (i8*, i8*, i32, i8*)* @wrapper_tensorReduce to i8*), i8** %tensorReduce, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorProjectiveT to i8*), i8** %tensorProjectiveT, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorMap1 to i8*), i8** %tensorMap1, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i8*)* @wrapper_tensorMap2 to i8*), i8** %tensorMap2, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i8*)* @wrapper_tensorMap3 to i8*), i8** %tensorMap3, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorStencil to i8*), i8** %tensorStencil, align 8
-  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorCosineT to i8*), i8** %tensorCosineT, align 8
-  ret void
-}
-
-declare void @llvm_hpvm_initTensorRt(i32) #1
-
-declare void @llvm_hpvm_cleanupTensorRt() #1
-
-declare void @llvm_hpvm_initApproxhpvmRt(i32) #1
-
-declare void @llvm_hpvm_cleanupApproxhpvmRt() #1
-
-declare void @llvm_hpvm_initializeRuntimeController(i8*, i8*) #1
-
-declare void @llvm_hpvm_clearRuntimeController() #1
-
-declare void @hpvm_request_tensor(i8*, i32) #1
-
-declare void @startProfiling() #1
-
-declare void @stopProfiling() #1
-
-declare i8* @create2DTensor(i32, i64, i64) #1
-
-declare i8* @create3DTensor(i32, i64, i64, i64) #1
-
-declare i8* @create4DTensor(i32, i32, i64, i64, i64, i64) #1
-
-declare void @initTensorData(i8*, i8*, i64) #1
-
-declare i8** @tensorSplit(i8*, i32, i32) #1
-
-declare i8* @tensorConcat(i8**, i32, i32) #1
-
-declare i8* @tensorConvolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #1
-
-declare i8* @tensorHalfConvolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #1
-
-declare i8* @tensorPooling(i8*, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare i8* @tensorHalfPooling(i8*, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare i8* @tensorLRN(i8*, i32, double, double, double) #1
-
-declare i8* @tensorGemm(i8*, i8*) #1
-
-declare i8* @tensorGemmCPU(i8*, i8*) #1
-
-declare i8* @tensorGemmGPU(i8*, i8*) #1
-
-declare i8* @tensorHalfGemm(i8*, i8*) #1
-
-declare i8* @tensorGemmBias(i8*, i8*) #1
-
-declare i8* @tensorAdd(i8*, i8*) #1
-
-declare i8* @tensorHalfAdd(i8*, i8*) #1
-
-declare i8* @tensorRelu(i8*) #1
-
-declare i8* @tensorRelu2(i8*, float, float) #1
-
-declare i8* @tensorHalfRelu2(i8*, float, float) #1
-
-declare i8* @tensorTanh(i8*) #1
-
-declare i8* @tensorHalfTanh(i8*) #1
-
-declare i8* @tensorSoftmax(i8*) #1
-
-declare i8* @tensorBatchNorm(i8*, i8*, i8*, i8*, i8*, double) #1
-
-declare i8* @tensorAddError(i8*, i32) #1
-
-declare i8* @ConvLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, float, i32, i32, i32, i32, i32, i32, i32, float, float, i32) #1
-
-declare i8* @FCLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, float, i32, float, float, i32) #1
-
-declare i8* @wrapper_ConvLayer(i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float) #1
-
-declare i8* @wrapper_FCLayer(i8*, i8*, i8*, i8*, i32, float, float) #1
-
-declare i8* @wrapper_tensorAdd(i8*, i8*, i8*) #1
-
-declare i8* @wrapper_tensorRelu(i8*, i8*) #1
-
-declare i8* @wrapper_tensorTanh(i8*, i8*) #1
-
-declare i8* @wrapper_tensorBatchNorm(i8*, i8*, i8*, i8*, i8*, i8*, double) #1
-
-declare i8* @wrapper_tensorPooling(i8*, i8*, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare i8* @wrapper_tensorSoftmax(i8*, i8*) #1
-
-declare i8* @wrapper_tensorFft(i8*, i8*) #1
-
-declare i8* @wrapper_tensorReduce(i8*, i8*, i32, i8*) #1
-
-declare i8* @wrapper_tensorProjectiveT(i8*, i8*, i8*) #1
-
-declare i8* @wrapper_tensorMap1(i8*, i8*, i8*) #1
-
-declare i8* @wrapper_tensorMap2(i8*, i8*, i8*, i8*) #1
-
-declare i8* @wrapper_tensorMap3(i8*, i8*, i8*, i8*, i8*) #1
-
-declare i8* @wrapper_tensorStencil(i8*, i8*) #1
-
-declare i8* @wrapper_tensorCosineT(i8*, i8*) #1
-
-attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 4.0.1 "}
-- 
GitLab