diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..9454a1b4ceea6cb3cf23efa171badb3bcbfefd72
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile
@@ -0,0 +1,47 @@
+DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
+CC = $(LLVM_SRC_ROOT)/../build/bin/clang++
+OPT = $(LLVM_SRC_ROOT)/../build/bin/opt
+LLVM_DIS = $(LLVM_SRC_ROOT)/../build/bin/llvm-dis
+LLVM_LINK = $(LLVM_SRC_ROOT)/../build/bin/llvm-link
+LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
+
+SRC_DIR = src
+BUILD_DIR = build
+APP = lenet
+
+TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
+TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
+TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/build/libtensor_runtime.a
+
+CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH)  -fno-exceptions -ffast-math -std=c++11 -O3
+CCFLAGS += -DDEVICE=CUDNN_TARGET
+LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL
+
+VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG
+
+TARGET = $(BUILD_DIR)/$(APP).opt.bc
+SOURCES = $(SRC_DIR)/$(APP).cpp
+VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll
+
+#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll)
+.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll
+default: $(BUILD_DIR) $(TARGET)
+
+
+$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp
+	$(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $<
+
+#-visc-timers-gen
+$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll
+	$(OPT) -load LLVMGenVISC.so -genvisc -globaldce  $< -S -o $@
+
+$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.visc.ll
+	$(OPT) $(VISC_OPTFLAGS) $< -o $@
+	$(LLVM_LINK) $@ $(VISC_RT_PATH) -o $(BUILD_DIR)/lenet_linked.bc
+	$(CC) $(BUILD_DIR)/lenet_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/lenet_linked $(LINKER_FLAGS)
+
+$(BUILD_DIR):
+	mkdir -p $@
+
+clean:
+	rm -rf $(BUILD_DIR)
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.ll b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.ll
new file mode 100644
index 0000000000000000000000000000000000000000..c07d88c871464bc6b8f48becbcd12b0698c34cbf
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.ll
@@ -0,0 +1,2645 @@
+; ModuleID = 'src/lenet.cpp'
+source_filename = "src/lenet.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"class.std::ios_base::Init" = type { i8 }
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+%struct.Tensor = type { i32, i32, i32, %struct.cudnnTensorStruct*, %struct.cudnnFilterStruct*, i8*, i8*, i64, i64, %struct.Dimension }
+%struct.cudnnTensorStruct = type opaque
+%struct.cudnnFilterStruct = type opaque
+%struct.Dimension = type { i32, i64* }
+%"class.std::__cxx11::basic_ostringstream" = type { %"class.std::basic_ostream.base", %"class.std::__cxx11::basic_stringbuf", %"class.std::basic_ios" }
+%"class.std::basic_ostream.base" = type { i32 (...)** }
+%"class.std::__cxx11::basic_stringbuf" = type { %"class.std::basic_streambuf", i32, %"class.std::__cxx11::basic_string" }
+%"class.std::basic_streambuf" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"class.std::locale" }
+%"class.std::locale" = type { %"class.std::locale::_Impl"* }
+%"class.std::locale::_Impl" = type { i32, %"class.std::locale::facet"**, i64, %"class.std::locale::facet"**, i8** }
+%"class.std::locale::facet" = type <{ i32 (...)**, i32, [4 x i8] }>
+%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider", i64, %union.anon }
+%"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%union.anon = type { i64, [8 x i8] }
+%"class.std::basic_ios" = type { %"class.std::ios_base", %"class.std::basic_ostream"*, i8, i8, %"class.std::basic_streambuf"*, %"class.std::ctype"*, %"class.std::num_put"*, %"class.std::num_get"* }
+%"class.std::ios_base" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"class.std::locale" }
+%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"class.std::ios_base"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words" = type { i8*, i64 }
+%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_ios" }
+%"class.std::ctype" = type <{ %"class.std::locale::facet.base", [4 x i8], %struct.__locale_struct*, i8, [7 x i8], i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8, [6 x i8] }>
+%"class.std::locale::facet.base" = type <{ i32 (...)**, i32 }>
+%struct.__locale_struct = type { [13 x %struct.__locale_data*], i16*, i32*, i32*, [13 x i8*] }
+%struct.__locale_data = type opaque
+%"class.std::num_put" = type { %"class.std::locale::facet.base", [4 x i8] }
+%"class.std::num_get" = type { %"class.std::locale::facet.base", [4 x i8] }
+
+@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
+@__dso_handle = external global i8
+@.str.1 = private unnamed_addr constant [19 x i8] c"tensor dims = %d \0A\00", align 1
+@.str.2 = private unnamed_addr constant [18 x i8] c"dim1_size = %zu \0A\00", align 1
+@.str.3 = private unnamed_addr constant [18 x i8] c"dim2_size = %zu \0A\00", align 1
+@.str.4 = private unnamed_addr constant [18 x i8] c"num_elems = %zu \0A\00", align 1
+@.str.5 = private unnamed_addr constant [3 x i8] c"wb\00", align 1
+@.str.6 = private unnamed_addr constant [58 x i8] c"File %s could not be created. Check if directory exists \0A\00", align 1
+@.str.7 = private unnamed_addr constant [22 x i8] c"size_in_bytes = %zu \0A\00", align 1
+@.str.8 = private unnamed_addr constant [21 x i8] c"bytes_written = %zu\0A\00", align 1
+@.str.9 = private unnamed_addr constant [4 x i8] c"%f,\00", align 1
+@.str.11 = private unnamed_addr constant [18 x i8] c"Num_elems = %zu \0A\00", align 1
+@.str.12 = private unnamed_addr constant [16 x i8] c"dim[%d] = %zu \0A\00", align 1
+@.str.13 = private unnamed_addr constant [35 x i8] c"Tensor data mismatch at index %d \0A\00", align 1
+@.str.14 = private unnamed_addr constant [21 x i8] c"Tensor data mismatch\00", align 1
+@.str.15 = private unnamed_addr constant [3 x i8] c"rb\00", align 1
+@.str.16 = private unnamed_addr constant [41 x i8] c"Data file %s is not found. Aborting... \0A\00", align 1
+@.str.17 = private unnamed_addr constant [23 x i8] c"tensor_data[%d] = %f \0A\00", align 1
+@.str.18 = private unnamed_addr constant [40 x i8] c"Data file %s is not found. Aborting...\0A\00", align 1
+@.str.19 = private unnamed_addr constant [28 x i8] c"--labels bytes_read = %zu \0A\00", align 1
+@.str.20 = private unnamed_addr constant [24 x i8] c"****** Accuracy = %f \0A\0A\00", align 1
+@.str.21 = private unnamed_addr constant [15 x i8] c"final_accuracy\00", align 1
+@.str.22 = private unnamed_addr constant [3 x i8] c"w+\00", align 1
+@.str.23 = private unnamed_addr constant [55 x i8] c"../../../../../../projects/hpvm-tensor-rt/model_params\00", align 1
+@.str.24 = private unnamed_addr constant [35 x i8] c"/FC_network2/mnist_float_input.bin\00", align 1
+@.str.25 = private unnamed_addr constant [23 x i8] c"/lenet_keras/conv1.bin\00", align 1
+@.str.26 = private unnamed_addr constant [28 x i8] c"/lenet_keras/conv1_bias.bin\00", align 1
+@.str.27 = private unnamed_addr constant [23 x i8] c"/lenet_keras/conv2.bin\00", align 1
+@.str.28 = private unnamed_addr constant [28 x i8] c"/lenet_keras/conv2_bias.bin\00", align 1
+@.str.29 = private unnamed_addr constant [31 x i8] c"Reading Input Data from = %s \0A\00", align 1
+@_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE = external unnamed_addr constant { [5 x i8*], [5 x i8*] }
+@_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE = external unnamed_addr constant [4 x i8*]
+@_ZTVSt9basic_iosIcSt11char_traitsIcEE = external unnamed_addr constant { [4 x i8*] }
+@_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE = external unnamed_addr constant { [16 x i8*] }
+@_ZTVSt15basic_streambufIcSt11char_traitsIcEE = external unnamed_addr constant { [16 x i8*] }
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_lenet.cpp, i8* null }]
+@str = private unnamed_addr constant [23 x i8] c"Successful cudaMalloc \00"
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #0
+
+; Function Attrs: nounwind
+declare void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) local_unnamed_addr #2
+
+; Function Attrs: nounwind uwtable
+define void @_Z15printTensorInfoPv(i8* nocapture readonly %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  %gpu_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 40
+  %0 = bitcast i8* %gpu_data to i8**
+  %1 = load i8*, i8** %0, align 8, !tbaa !1
+  %cmp = icmp eq i8* %1, null
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @str, i64 0, i64 0))
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %dims = getelementptr inbounds i8, i8* %tensor_ptr, i64 64
+  %num_dims = bitcast i8* %dims to i32*
+  %2 = load i32, i32* %num_dims, align 8, !tbaa !10
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.1, i64 0, i64 0), i32 %2)
+  %dim_sizes = getelementptr inbounds i8, i8* %tensor_ptr, i64 72
+  %3 = bitcast i8* %dim_sizes to i64**
+  %4 = load i64*, i64** %3, align 8, !tbaa !11
+  %5 = load i64, i64* %4, align 8, !tbaa !12
+  %call3 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.2, i64 0, i64 0), i64 %5)
+  %6 = load i64*, i64** %3, align 8, !tbaa !11
+  %arrayidx6 = getelementptr inbounds i64, i64* %6, i64 1
+  %7 = load i64, i64* %arrayidx6, align 8, !tbaa !12
+  %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.3, i64 0, i64 0), i64 %7)
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %8 = bitcast i8* %num_elems to i64*
+  %9 = load i64, i64* %8, align 8, !tbaa !13
+  %call8 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.4, i64 0, i64 0), i64 %9)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #4
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #4
+
+; Function Attrs: nounwind uwtable
+define void @_Z17dumpWeightsToFilePcPv(i8* %file_name, i8* %weights_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %weights_ptr, i32 0) #2
+  %call = tail call %struct._IO_FILE* @fopen(i8* %file_name, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.5, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @.str.6, i64 0, i64 0), i8* %file_name)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %size_in_bytes = getelementptr inbounds i8, i8* %weights_ptr, i64 56
+  %0 = bitcast i8* %size_in_bytes to i64*
+  %1 = load i64, i64* %0, align 8, !tbaa !14
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.7, i64 0, i64 0), i64 %1)
+  %host_data = getelementptr inbounds i8, i8* %weights_ptr, i64 32
+  %2 = bitcast i8* %host_data to i8**
+  %3 = load i8*, i8** %2, align 8, !tbaa !15
+  %4 = load i64, i64* %0, align 8, !tbaa !14
+  %call4 = tail call i64 @fwrite(i8* %3, i64 1, i64 %4, %struct._IO_FILE* nonnull %call)
+  %call5 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.8, i64 0, i64 0), i64 %call4)
+  %call6 = tail call i32 @fclose(%struct._IO_FILE* nonnull %call)
+  ret void
+}
+
+declare void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #0
+
+; Function Attrs: nounwind
+declare noalias %struct._IO_FILE* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #1
+
+; Function Attrs: noreturn nounwind
+declare void @abort() local_unnamed_addr #5
+
+; Function Attrs: nounwind
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i32 @fclose(%struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define void @_Z18fillTensorWithOnesPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !16
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !15
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !13
+  %cmp110 = icmp eq i64 %4, 0
+  br i1 %cmp110, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 8
+  br i1 %min.iters.check, label %for.body.preheader22, label %min.iters.checked
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %4, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader22, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add i64 %4, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader22, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = add nuw nsw i64 %11, 1
+  %xtraiter = and i64 %12, 7
+  %lcmp.mod = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod, label %vector.body.prol.loopexit, label %vector.body.prol.preheader
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.prol.preheader
+  %index.prol = phi i64 [ %index.next.prol, %vector.body.prol ], [ 0, %vector.body.prol.preheader ]
+  %prol.iter = phi i64 [ %prol.iter.sub, %vector.body.prol ], [ %xtraiter, %vector.body.prol.preheader ]
+  %13 = getelementptr inbounds float, float* %2, i64 %index.prol
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %14, align 4, !tbaa !17
+  %15 = getelementptr float, float* %13, i64 4
+  %16 = bitcast float* %15 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %16, align 4, !tbaa !17
+  %index.next.prol = add i64 %index.prol, 8
+  %prol.iter.sub = add i64 %prol.iter, -1
+  %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+  br i1 %prol.iter.cmp, label %vector.body.prol.loopexit.unr-lcssa, label %vector.body.prol, !llvm.loop !19
+
+vector.body.prol.loopexit.unr-lcssa:              ; preds = %vector.body.prol
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.preheader, %vector.body.prol.loopexit.unr-lcssa
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.prol, %vector.body.prol.loopexit.unr-lcssa ]
+  %17 = icmp ult i64 %10, 56
+  br i1 %17, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.7, %vector.body ]
+  %18 = getelementptr inbounds float, float* %2, i64 %index
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %19, align 4, !tbaa !17
+  %20 = getelementptr float, float* %18, i64 4
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %21, align 4, !tbaa !17
+  %index.next = add i64 %index, 8
+  %22 = getelementptr inbounds float, float* %2, i64 %index.next
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %23, align 4, !tbaa !17
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %25, align 4, !tbaa !17
+  %index.next.1 = add i64 %index, 16
+  %26 = getelementptr inbounds float, float* %2, i64 %index.next.1
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %27, align 4, !tbaa !17
+  %28 = getelementptr float, float* %26, i64 4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %29, align 4, !tbaa !17
+  %index.next.2 = add i64 %index, 24
+  %30 = getelementptr inbounds float, float* %2, i64 %index.next.2
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %31, align 4, !tbaa !17
+  %32 = getelementptr float, float* %30, i64 4
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %33, align 4, !tbaa !17
+  %index.next.3 = add i64 %index, 32
+  %34 = getelementptr inbounds float, float* %2, i64 %index.next.3
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %35, align 4, !tbaa !17
+  %36 = getelementptr float, float* %34, i64 4
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %37, align 4, !tbaa !17
+  %index.next.4 = add i64 %index, 40
+  %38 = getelementptr inbounds float, float* %2, i64 %index.next.4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %39, align 4, !tbaa !17
+  %40 = getelementptr float, float* %38, i64 4
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %41, align 4, !tbaa !17
+  %index.next.5 = add i64 %index, 48
+  %42 = getelementptr inbounds float, float* %2, i64 %index.next.5
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %43, align 4, !tbaa !17
+  %44 = getelementptr float, float* %42, i64 4
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %45, align 4, !tbaa !17
+  %index.next.6 = add i64 %index, 56
+  %46 = getelementptr inbounds float, float* %2, i64 %index.next.6
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %47, align 4, !tbaa !17
+  %48 = getelementptr float, float* %46, i64 4
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %49, align 4, !tbaa !17
+  %index.next.7 = add i64 %index, 64
+  %50 = icmp eq i64 %index.next.7, %n.vec
+  br i1 %50, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !21
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %if.end, label %for.body.preheader22
+
+for.body.preheader22:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv12.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.011.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader22, %for.body
+  %conv12 = phi i64 [ %conv, %for.body ], [ %conv12.ph, %for.body.preheader22 ]
+  %i.011 = phi i32 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader22 ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv12
+  store float 1.000000e+00, float* %arrayidx, align 4, !tbaa !17
+  %inc = add i32 %i.011, 1
+  %conv = zext i32 %inc to i64
+  %cmp1 = icmp ult i64 %conv, %4
+  br i1 %cmp1, label %for.body, label %if.end.loopexit, !llvm.loop !24
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block, %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z19fillWithOnesAndTwosPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !16
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !15
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !13
+  %div35 = lshr i64 %4, 1
+  %cmp136 = icmp eq i64 %div35, 0
+  br i1 %cmp136, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 16
+  br i1 %min.iters.check, label %for.body.preheader85, label %min.iters.checked
+
+for.body.preheader85:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv38.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.037.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %div35, 9223372036854775800
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader85, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add nsw i64 %div35, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader85, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add nsw i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = add nuw nsw i64 %11, 1
+  %xtraiter86 = and i64 %12, 7
+  %lcmp.mod87 = icmp eq i64 %xtraiter86, 0
+  br i1 %lcmp.mod87, label %vector.body.prol.loopexit, label %vector.body.prol.preheader
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.prol.preheader
+  %index.prol = phi i64 [ %index.next.prol, %vector.body.prol ], [ 0, %vector.body.prol.preheader ]
+  %prol.iter88 = phi i64 [ %prol.iter88.sub, %vector.body.prol ], [ %xtraiter86, %vector.body.prol.preheader ]
+  %13 = getelementptr inbounds float, float* %2, i64 %index.prol
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %14, align 4, !tbaa !17
+  %15 = getelementptr float, float* %13, i64 4
+  %16 = bitcast float* %15 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %16, align 4, !tbaa !17
+  %index.next.prol = add i64 %index.prol, 8
+  %prol.iter88.sub = add i64 %prol.iter88, -1
+  %prol.iter88.cmp = icmp eq i64 %prol.iter88.sub, 0
+  br i1 %prol.iter88.cmp, label %vector.body.prol.loopexit.unr-lcssa, label %vector.body.prol, !llvm.loop !25
+
+vector.body.prol.loopexit.unr-lcssa:              ; preds = %vector.body.prol
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.preheader, %vector.body.prol.loopexit.unr-lcssa
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.prol, %vector.body.prol.loopexit.unr-lcssa ]
+  %17 = icmp ult i64 %10, 56
+  br i1 %17, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.7, %vector.body ]
+  %18 = getelementptr inbounds float, float* %2, i64 %index
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %19, align 4, !tbaa !17
+  %20 = getelementptr float, float* %18, i64 4
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %21, align 4, !tbaa !17
+  %index.next = add i64 %index, 8
+  %22 = getelementptr inbounds float, float* %2, i64 %index.next
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %23, align 4, !tbaa !17
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %25, align 4, !tbaa !17
+  %index.next.1 = add i64 %index, 16
+  %26 = getelementptr inbounds float, float* %2, i64 %index.next.1
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %27, align 4, !tbaa !17
+  %28 = getelementptr float, float* %26, i64 4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %29, align 4, !tbaa !17
+  %index.next.2 = add i64 %index, 24
+  %30 = getelementptr inbounds float, float* %2, i64 %index.next.2
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %31, align 4, !tbaa !17
+  %32 = getelementptr float, float* %30, i64 4
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %33, align 4, !tbaa !17
+  %index.next.3 = add i64 %index, 32
+  %34 = getelementptr inbounds float, float* %2, i64 %index.next.3
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %35, align 4, !tbaa !17
+  %36 = getelementptr float, float* %34, i64 4
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %37, align 4, !tbaa !17
+  %index.next.4 = add i64 %index, 40
+  %38 = getelementptr inbounds float, float* %2, i64 %index.next.4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %39, align 4, !tbaa !17
+  %40 = getelementptr float, float* %38, i64 4
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %41, align 4, !tbaa !17
+  %index.next.5 = add i64 %index, 48
+  %42 = getelementptr inbounds float, float* %2, i64 %index.next.5
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %43, align 4, !tbaa !17
+  %44 = getelementptr float, float* %42, i64 4
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %45, align 4, !tbaa !17
+  %index.next.6 = add i64 %index, 56
+  %46 = getelementptr inbounds float, float* %2, i64 %index.next.6
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %47, align 4, !tbaa !17
+  %48 = getelementptr float, float* %46, i64 4
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %49, align 4, !tbaa !17
+  %index.next.7 = add i64 %index, 64
+  %50 = icmp eq i64 %index.next.7, %n.vec
+  br i1 %50, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !26
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa
+  %cmp.n = icmp eq i64 %div35, %n.vec
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader85
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %middle.block, %if.then
+  %div.lcssa = phi i64 [ 0, %if.then ], [ %div35, %middle.block ], [ %div35, %for.cond.cleanup.loopexit ]
+  %conv731 = and i64 %div.lcssa, 4294967295
+  %cmp932 = icmp ult i64 %conv731, %4
+  br i1 %cmp932, label %for.body11.lr.ph, label %if.end
+
+for.body11.lr.ph:                                 ; preds = %for.cond.cleanup
+  %conv5 = trunc i64 %div.lcssa to i32
+  %51 = add nuw i64 %div.lcssa, 1
+  %52 = and i64 %51, 4294967295
+  %53 = icmp ugt i64 %4, %52
+  %umax = select i1 %53, i64 %4, i64 %52
+  %54 = add i64 %umax, 1
+  %55 = sub i64 %54, %52
+  %min.iters.check52 = icmp ult i64 %55, 8
+  br i1 %min.iters.check52, label %for.body11.preheader, label %min.iters.checked53
+
+for.body11.preheader:                             ; preds = %middle.block50, %vector.scevcheck65, %min.iters.checked53, %for.body11.lr.ph
+  %conv734.ph = phi i64 [ %conv731, %vector.scevcheck65 ], [ %conv731, %min.iters.checked53 ], [ %conv731, %for.body11.lr.ph ], [ %ind.end70, %middle.block50 ]
+  %i2.033.ph = phi i32 [ %conv5, %vector.scevcheck65 ], [ %conv5, %min.iters.checked53 ], [ %conv5, %for.body11.lr.ph ], [ %ind.end73, %middle.block50 ]
+  br label %for.body11
+
+min.iters.checked53:                              ; preds = %for.body11.lr.ph
+  %n.vec55 = and i64 %55, -8
+  %cmp.zero56 = icmp eq i64 %n.vec55, 0
+  br i1 %cmp.zero56, label %for.body11.preheader, label %vector.scevcheck65
+
+vector.scevcheck65:                               ; preds = %min.iters.checked53
+  %56 = add i32 %conv5, 1
+  %57 = zext i32 %56 to i64
+  %58 = icmp ugt i64 %4, %57
+  %umax58 = select i1 %58, i64 %4, i64 %57
+  %59 = sub i64 %umax58, %57
+  %60 = trunc i64 %59 to i32
+  %61 = add i32 %56, %60
+  %62 = icmp ult i32 %61, %56
+  %63 = icmp ugt i64 %59, 4294967295
+  %64 = or i1 %62, %63
+  %65 = trunc i64 %59 to i32
+  %66 = add i32 %conv5, %65
+  %67 = icmp ult i32 %66, %conv5
+  %68 = icmp ugt i64 %59, 4294967295
+  %69 = or i1 %67, %68
+  %70 = or i1 %64, %69
+  %ind.end70 = add i64 %conv731, %n.vec55
+  %cast.crd72 = trunc i64 %n.vec55 to i32
+  %ind.end73 = add i32 %conv5, %cast.crd72
+  br i1 %70, label %for.body11.preheader, label %vector.body49.preheader
+
+vector.body49.preheader:                          ; preds = %vector.scevcheck65
+  %71 = add i64 %n.vec55, -8
+  %72 = lshr exact i64 %71, 3
+  %73 = add nuw nsw i64 %72, 1
+  %xtraiter = and i64 %73, 3
+  %lcmp.mod = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod, label %vector.body49.prol.loopexit, label %vector.body49.prol.preheader
+
+vector.body49.prol.preheader:                     ; preds = %vector.body49.preheader
+  br label %vector.body49.prol
+
+vector.body49.prol:                               ; preds = %vector.body49.prol, %vector.body49.prol.preheader
+  %index67.prol = phi i64 [ %index.next68.prol, %vector.body49.prol ], [ 0, %vector.body49.prol.preheader ]
+  %prol.iter = phi i64 [ %prol.iter.sub, %vector.body49.prol ], [ %xtraiter, %vector.body49.prol.preheader ]
+  %74 = add i64 %conv731, %index67.prol
+  %75 = getelementptr inbounds float, float* %2, i64 %74
+  %76 = bitcast float* %75 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %76, align 4, !tbaa !17
+  %77 = getelementptr float, float* %75, i64 4
+  %78 = bitcast float* %77 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %78, align 4, !tbaa !17
+  %index.next68.prol = add i64 %index67.prol, 8
+  %prol.iter.sub = add i64 %prol.iter, -1
+  %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+  br i1 %prol.iter.cmp, label %vector.body49.prol.loopexit.unr-lcssa, label %vector.body49.prol, !llvm.loop !27
+
+vector.body49.prol.loopexit.unr-lcssa:            ; preds = %vector.body49.prol
+  br label %vector.body49.prol.loopexit
+
+vector.body49.prol.loopexit:                      ; preds = %vector.body49.preheader, %vector.body49.prol.loopexit.unr-lcssa
+  %index67.unr = phi i64 [ 0, %vector.body49.preheader ], [ %index.next68.prol, %vector.body49.prol.loopexit.unr-lcssa ]
+  %79 = icmp ult i64 %71, 24
+  br i1 %79, label %middle.block50, label %vector.body49.preheader.new
+
+vector.body49.preheader.new:                      ; preds = %vector.body49.prol.loopexit
+  br label %vector.body49
+
+vector.body49:                                    ; preds = %vector.body49, %vector.body49.preheader.new
+  %index67 = phi i64 [ %index67.unr, %vector.body49.preheader.new ], [ %index.next68.3, %vector.body49 ]
+  %80 = add i64 %conv731, %index67
+  %81 = getelementptr inbounds float, float* %2, i64 %80
+  %82 = bitcast float* %81 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %82, align 4, !tbaa !17
+  %83 = getelementptr float, float* %81, i64 4
+  %84 = bitcast float* %83 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %84, align 4, !tbaa !17
+  %index.next68 = add i64 %index67, 8
+  %85 = add i64 %conv731, %index.next68
+  %86 = getelementptr inbounds float, float* %2, i64 %85
+  %87 = bitcast float* %86 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %87, align 4, !tbaa !17
+  %88 = getelementptr float, float* %86, i64 4
+  %89 = bitcast float* %88 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %89, align 4, !tbaa !17
+  %index.next68.1 = add i64 %index67, 16
+  %90 = add i64 %conv731, %index.next68.1
+  %91 = getelementptr inbounds float, float* %2, i64 %90
+  %92 = bitcast float* %91 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %92, align 4, !tbaa !17
+  %93 = getelementptr float, float* %91, i64 4
+  %94 = bitcast float* %93 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %94, align 4, !tbaa !17
+  %index.next68.2 = add i64 %index67, 24
+  %95 = add i64 %conv731, %index.next68.2
+  %96 = getelementptr inbounds float, float* %2, i64 %95
+  %97 = bitcast float* %96 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %97, align 4, !tbaa !17
+  %98 = getelementptr float, float* %96, i64 4
+  %99 = bitcast float* %98 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %99, align 4, !tbaa !17
+  %index.next68.3 = add i64 %index67, 32
+  %100 = icmp eq i64 %index.next68.3, %n.vec55
+  br i1 %100, label %middle.block50.unr-lcssa, label %vector.body49, !llvm.loop !28
+
+middle.block50.unr-lcssa:                         ; preds = %vector.body49
+  br label %middle.block50
+
+middle.block50:                                   ; preds = %vector.body49.prol.loopexit, %middle.block50.unr-lcssa
+  %cmp.n74 = icmp eq i64 %55, %n.vec55
+  br i1 %cmp.n74, label %if.end, label %for.body11.preheader
+
+for.body:                                         ; preds = %for.body.preheader85, %for.body
+  %conv38 = phi i64 [ %conv, %for.body ], [ %conv38.ph, %for.body.preheader85 ]
+  %i.037 = phi i32 [ %inc, %for.body ], [ %i.037.ph, %for.body.preheader85 ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv38
+  store float 1.000000e+00, float* %arrayidx, align 4, !tbaa !17
+  %inc = add i32 %i.037, 1
+  %conv = zext i32 %inc to i64
+  %cmp1 = icmp ult i64 %conv, %div35
+  br i1 %cmp1, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !29
+
+for.body11:                                       ; preds = %for.body11.preheader, %for.body11
+  %conv734 = phi i64 [ %conv7, %for.body11 ], [ %conv734.ph, %for.body11.preheader ]
+  %i2.033 = phi i32 [ %inc15, %for.body11 ], [ %i2.033.ph, %for.body11.preheader ]
+  %arrayidx13 = getelementptr inbounds float, float* %2, i64 %conv734
+  store float 2.000000e+00, float* %arrayidx13, align 4, !tbaa !17
+  %inc15 = add i32 %i2.033, 1
+  %conv7 = zext i32 %inc15 to i64
+  %cmp9 = icmp ult i64 %conv7, %4
+  br i1 %cmp9, label %for.body11, label %if.end.loopexit, !llvm.loop !30
+
+if.end.loopexit:                                  ; preds = %for.body11
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block50, %for.cond.cleanup, %entry
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z21fillTensorWithNegOnesPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !16
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !15
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !13
+  %cmp110 = icmp eq i64 %4, 0
+  br i1 %cmp110, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 8
+  br i1 %min.iters.check, label %for.body.preheader22, label %min.iters.checked
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %4, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader22, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add i64 %4, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader22, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = add nuw nsw i64 %11, 1
+  %xtraiter = and i64 %12, 7
+  %lcmp.mod = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod, label %vector.body.prol.loopexit, label %vector.body.prol.preheader
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.prol.preheader
+  %index.prol = phi i64 [ %index.next.prol, %vector.body.prol ], [ 0, %vector.body.prol.preheader ]
+  %prol.iter = phi i64 [ %prol.iter.sub, %vector.body.prol ], [ %xtraiter, %vector.body.prol.preheader ]
+  %13 = getelementptr inbounds float, float* %2, i64 %index.prol
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %14, align 4, !tbaa !17
+  %15 = getelementptr float, float* %13, i64 4
+  %16 = bitcast float* %15 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %16, align 4, !tbaa !17
+  %index.next.prol = add i64 %index.prol, 8
+  %prol.iter.sub = add i64 %prol.iter, -1
+  %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+  br i1 %prol.iter.cmp, label %vector.body.prol.loopexit.unr-lcssa, label %vector.body.prol, !llvm.loop !31
+
+vector.body.prol.loopexit.unr-lcssa:              ; preds = %vector.body.prol
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.preheader, %vector.body.prol.loopexit.unr-lcssa
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.prol, %vector.body.prol.loopexit.unr-lcssa ]
+  %17 = icmp ult i64 %10, 56
+  br i1 %17, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.7, %vector.body ]
+  %18 = getelementptr inbounds float, float* %2, i64 %index
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %19, align 4, !tbaa !17
+  %20 = getelementptr float, float* %18, i64 4
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %21, align 4, !tbaa !17
+  %index.next = add i64 %index, 8
+  %22 = getelementptr inbounds float, float* %2, i64 %index.next
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %23, align 4, !tbaa !17
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %25, align 4, !tbaa !17
+  %index.next.1 = add i64 %index, 16
+  %26 = getelementptr inbounds float, float* %2, i64 %index.next.1
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %27, align 4, !tbaa !17
+  %28 = getelementptr float, float* %26, i64 4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %29, align 4, !tbaa !17
+  %index.next.2 = add i64 %index, 24
+  %30 = getelementptr inbounds float, float* %2, i64 %index.next.2
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %31, align 4, !tbaa !17
+  %32 = getelementptr float, float* %30, i64 4
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %33, align 4, !tbaa !17
+  %index.next.3 = add i64 %index, 32
+  %34 = getelementptr inbounds float, float* %2, i64 %index.next.3
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %35, align 4, !tbaa !17
+  %36 = getelementptr float, float* %34, i64 4
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %37, align 4, !tbaa !17
+  %index.next.4 = add i64 %index, 40
+  %38 = getelementptr inbounds float, float* %2, i64 %index.next.4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %39, align 4, !tbaa !17
+  %40 = getelementptr float, float* %38, i64 4
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %41, align 4, !tbaa !17
+  %index.next.5 = add i64 %index, 48
+  %42 = getelementptr inbounds float, float* %2, i64 %index.next.5
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %43, align 4, !tbaa !17
+  %44 = getelementptr float, float* %42, i64 4
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %45, align 4, !tbaa !17
+  %index.next.6 = add i64 %index, 56
+  %46 = getelementptr inbounds float, float* %2, i64 %index.next.6
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %47, align 4, !tbaa !17
+  %48 = getelementptr float, float* %46, i64 4
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %49, align 4, !tbaa !17
+  %index.next.7 = add i64 %index, 64
+  %50 = icmp eq i64 %index.next.7, %n.vec
+  br i1 %50, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !32
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %if.end, label %for.body.preheader22
+
+for.body.preheader22:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv12.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.011.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader22, %for.body
+  %conv12 = phi i64 [ %conv, %for.body ], [ %conv12.ph, %for.body.preheader22 ]
+  %i.011 = phi i32 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader22 ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv12
+  store float -1.000000e+00, float* %arrayidx, align 4, !tbaa !17
+  %inc = add i32 %i.011, 1
+  %conv = zext i32 %inc to i64
+  %cmp1 = icmp ult i64 %conv, %4
+  br i1 %cmp1, label %for.body, label %if.end.loopexit, !llvm.loop !33
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block, %if.then, %entry
+  ret void
+}
+
+; Function Attrs: norecurse nounwind uwtable
+define void @_Z14fillTensorValsPv(i8* nocapture readonly %tensor_ptr) local_unnamed_addr #6 {
+entry:
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !16
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !15
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !13
+  %cmp111 = icmp eq i64 %4, 0
+  br i1 %cmp111, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 8
+  br i1 %min.iters.check, label %for.body.preheader23, label %min.iters.checked
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %4, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader23, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add i64 %4, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader23, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = and i64 %11, 1
+  %lcmp.mod = icmp eq i64 %12, 0
+  br i1 %lcmp.mod, label %vector.body.prol.preheader, label %vector.body.prol.loopexit
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol.preheader
+  %13 = bitcast float* %2 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, <4 x float>* %13, align 4, !tbaa !17
+  %14 = getelementptr float, float* %2, i64 4
+  %15 = bitcast float* %14 to <4 x float>*
+  store <4 x float> <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>, <4 x float>* %15, align 4, !tbaa !17
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ 8, %vector.body.prol ]
+  %16 = icmp eq i64 %11, 0
+  br i1 %16, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.1, %vector.body ]
+  %17 = trunc i64 %index to i32
+  %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %17, i32 0
+  %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer
+  %18 = add <4 x i32> %broadcast.splat20, <i32 1, i32 2, i32 3, i32 4>
+  %19 = add <4 x i32> %broadcast.splat20, <i32 5, i32 6, i32 7, i32 8>
+  %20 = uitofp <4 x i32> %18 to <4 x float>
+  %21 = uitofp <4 x i32> %19 to <4 x float>
+  %22 = getelementptr inbounds float, float* %2, i64 %index
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> %20, <4 x float>* %23, align 4, !tbaa !17
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> %21, <4 x float>* %25, align 4, !tbaa !17
+  %index.next = add i64 %index, 8
+  %26 = trunc i64 %index.next to i32
+  %broadcast.splatinsert19.1 = insertelement <4 x i32> undef, i32 %26, i32 0
+  %broadcast.splat20.1 = shufflevector <4 x i32> %broadcast.splatinsert19.1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %27 = add <4 x i32> %broadcast.splat20.1, <i32 1, i32 2, i32 3, i32 4>
+  %28 = add <4 x i32> %broadcast.splat20.1, <i32 5, i32 6, i32 7, i32 8>
+  %29 = uitofp <4 x i32> %27 to <4 x float>
+  %30 = uitofp <4 x i32> %28 to <4 x float>
+  %31 = getelementptr inbounds float, float* %2, i64 %index.next
+  %32 = bitcast float* %31 to <4 x float>*
+  store <4 x float> %29, <4 x float>* %32, align 4, !tbaa !17
+  %33 = getelementptr float, float* %31, i64 4
+  %34 = bitcast float* %33 to <4 x float>*
+  store <4 x float> %30, <4 x float>* %34, align 4, !tbaa !17
+  %index.next.1 = add i64 %index, 16
+  %35 = icmp eq i64 %index.next.1, %n.vec
+  br i1 %35, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !34
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %if.end, label %for.body.preheader23
+
+for.body.preheader23:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv13.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.012.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader23, %for.body
+  %conv13 = phi i64 [ %conv, %for.body ], [ %conv13.ph, %for.body.preheader23 ]
+  %i.012 = phi i32 [ %add, %for.body ], [ %i.012.ph, %for.body.preheader23 ]
+  %add = add i32 %i.012, 1
+  %conv2 = uitofp i32 %add to float
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv13
+  store float %conv2, float* %arrayidx, align 4, !tbaa !17
+  %conv = zext i32 %add to i64
+  %cmp1 = icmp ult i64 %conv, %4
+  br i1 %cmp1, label %for.body, label %if.end.loopexit, !llvm.loop !35
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block, %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z17printTensorValuesPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !16
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !15
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !13
+  %cmp112 = icmp eq i64 %4, 0
+  br i1 %cmp112, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %conv14 = phi i64 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv14
+  %5 = load float, float* %arrayidx, align 4, !tbaa !17
+  %conv2 = fpext float %5 to double
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.9, i64 0, i64 0), double %conv2)
+  %inc = add i32 %i.013, 1
+  %conv = zext i32 %inc to i64
+  %6 = load i64, i64* %3, align 8, !tbaa !13
+  %cmp1 = icmp ult i64 %conv, %6
+  br i1 %cmp1, label %for.body, label %if.end.loopexit
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %if.then, %entry
+  %putchar = tail call i32 @putchar(i32 10) #2
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z15printTensorDimsPv(i8* nocapture readonly %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %0 = bitcast i8* %num_elems to i64*
+  %1 = load i64, i64* %0, align 8, !tbaa !13
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.11, i64 0, i64 0), i64 %1)
+  %dims = getelementptr inbounds i8, i8* %tensor_ptr, i64 64
+  %num_dims = bitcast i8* %dims to i32*
+  %2 = load i32, i32* %num_dims, align 8, !tbaa !10
+  %cmp10 = icmp sgt i32 %2, 0
+  br i1 %cmp10, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %dim_sizes = getelementptr inbounds i8, i8* %tensor_ptr, i64 72
+  %3 = bitcast i8* %dim_sizes to i64**
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %4 = load i64*, i64** %3, align 8, !tbaa !11
+  %arrayidx = getelementptr inbounds i64, i64* %4, i64 %indvars.iv
+  %5 = load i64, i64* %arrayidx, align 8, !tbaa !12
+  %6 = trunc i64 %indvars.iv to i32
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.12, i64 0, i64 0), i32 %6, i64 %5)
+  %indvars.iv.next = add nuw i64 %indvars.iv, 1
+  %7 = load i32, i32* %num_dims, align 8, !tbaa !10
+  %8 = sext i32 %7 to i64
+  %cmp = icmp slt i64 %indvars.iv.next, %8
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z14compareTensorsPvS_(i8* %tensor1_ptr, i8* %tensor2_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor1_ptr, i32 0) #2
+  tail call void @hpvm_request_tensor(i8* %tensor2_ptr, i32 0) #2
+  %host_data = getelementptr inbounds i8, i8* %tensor1_ptr, i64 32
+  %0 = bitcast i8* %host_data to float**
+  %1 = load float*, float** %0, align 8, !tbaa !15
+  %host_data1 = getelementptr inbounds i8, i8* %tensor2_ptr, i64 32
+  %2 = bitcast i8* %host_data1 to float**
+  %3 = load float*, float** %2, align 8, !tbaa !15
+  %num_elems = getelementptr inbounds i8, i8* %tensor1_ptr, i64 48
+  %4 = bitcast i8* %num_elems to i64*
+  %5 = load i64, i64* %4, align 8, !tbaa !13
+  %cmp17 = icmp eq i64 %5, 0
+  br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %conv19 = phi i64 [ %conv, %for.inc ], [ 0, %for.body.preheader ]
+  %i.018 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %1, i64 %conv19
+  %6 = load float, float* %arrayidx, align 4, !tbaa !17
+  %arrayidx3 = getelementptr inbounds float, float* %3, i64 %conv19
+  %7 = load float, float* %arrayidx3, align 4, !tbaa !17
+  %cmp4 = fcmp fast une float %6, %7
+  br i1 %cmp4, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.13, i64 0, i64 0), i32 %i.018)
+  tail call void @abort() #8
+  unreachable
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.018, 1
+  %conv = zext i32 %inc to i64
+  %cmp = icmp ult i64 %conv, %5
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z13compareValuesPvPfm(i8* %tensor_ptr, float* nocapture readonly %data, i64 %num_elems) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %0 = bitcast i8* %host_data to float**
+  %1 = load float*, float** %0, align 8, !tbaa !15
+  %cmp11 = icmp eq i64 %num_elems, 0
+  br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %conv = zext i32 %inc to i64
+  %cmp = icmp ult i64 %conv, %num_elems
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %conv13 = phi i64 [ %conv, %for.cond ], [ 0, %for.body.preheader ]
+  %i.012 = phi i32 [ %inc, %for.cond ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %1, i64 %conv13
+  %2 = load float, float* %arrayidx, align 4, !tbaa !17
+  %arrayidx2 = getelementptr inbounds float, float* %data, i64 %conv13
+  %3 = load float, float* %arrayidx2, align 4, !tbaa !17
+  %cmp3 = fcmp fast une float %2, %3
+  %inc = add i32 %i.012, 1
+  br i1 %cmp3, label %if.then, label %for.cond
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.14, i64 0, i64 0))
+  tail call void @abort() #8
+  unreachable
+}
+
+; Function Attrs: nounwind uwtable
+define i8* @_Z15readInputTensorPKciiiii(i8* %file_name, i32 %data_type, i32 %dim1_size, i32 %dim2_size, i32 %dim3_size, i32 %dim4_size) local_unnamed_addr #3 {
+entry:
+  %mul = mul nsw i32 %dim2_size, %dim1_size
+  %mul1 = mul nsw i32 %mul, %dim3_size
+  %mul2 = mul nsw i32 %mul1, %dim4_size
+  %mul3 = shl i32 %dim1_size, 2
+  %mul4 = mul nsw i32 %mul3, %dim2_size
+  %mul5 = mul nsw i32 %mul4, %dim3_size
+  %mul6 = mul nsw i32 %mul5, %dim4_size
+  %conv = sext i32 %mul2 to i64
+  %call = tail call noalias i8* @malloc(i64 %conv) #2
+  %mul9 = shl nsw i64 %conv, 2
+  %call10 = tail call noalias i8* @malloc(i64 %mul9) #2
+  %0 = bitcast i8* %call10 to float*
+  %call11 = tail call %struct._IO_FILE* @fopen(i8* %file_name, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.15, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call11, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call12 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.16, i64 0, i64 0), i8* %file_name)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %call14 = tail call i32 @fseek(%struct._IO_FILE* nonnull %call11, i64 16, i32 1)
+  %call17 = tail call i64 @fread(i8* %call, i64 1, i64 %conv, %struct._IO_FILE* nonnull %call11)
+  %cmp1966 = icmp eq i32 %mul2, 0
+  br i1 %cmp1966, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.end
+  %1 = icmp ugt i64 %conv, 1
+  %umax = select i1 %1, i64 %conv, i64 1
+  %min.iters.check = icmp ult i64 %umax, 8
+  br i1 %min.iters.check, label %for.body.preheader72, label %min.iters.checked
+
+for.body.preheader72:                             ; preds = %middle.block, %min.iters.checked, %for.body.preheader
+  %i.067.ph = phi i64 [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  br label %for.body
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %umax, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader72, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %min.iters.checked
+  %2 = add nsw i64 %n.vec, -8
+  %3 = lshr exact i64 %2, 3
+  %4 = and i64 %3, 1
+  %lcmp.mod = icmp eq i64 %4, 0
+  br i1 %lcmp.mod, label %vector.body.prol.preheader, label %vector.body.prol.loopexit
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol.preheader
+  %5 = bitcast i8* %call to <4 x i8>*
+  %wide.load.prol = load <4 x i8>, <4 x i8>* %5, align 1, !tbaa !36
+  %6 = getelementptr i8, i8* %call, i64 4
+  %7 = bitcast i8* %6 to <4 x i8>*
+  %wide.load71.prol = load <4 x i8>, <4 x i8>* %7, align 1, !tbaa !36
+  %8 = uitofp <4 x i8> %wide.load.prol to <4 x float>
+  %9 = uitofp <4 x i8> %wide.load71.prol to <4 x float>
+  %10 = fmul fast <4 x float> %8, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %11 = fmul fast <4 x float> %9, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %12 = bitcast i8* %call10 to <4 x float>*
+  store <4 x float> %10, <4 x float>* %12, align 4, !tbaa !17
+  %13 = getelementptr i8, i8* %call10, i64 16
+  %14 = bitcast i8* %13 to <4 x float>*
+  store <4 x float> %11, <4 x float>* %14, align 4, !tbaa !17
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ 8, %vector.body.prol ]
+  %15 = icmp eq i64 %3, 0
+  br i1 %15, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.1, %vector.body ]
+  %16 = getelementptr inbounds i8, i8* %call, i64 %index
+  %17 = bitcast i8* %16 to <4 x i8>*
+  %wide.load = load <4 x i8>, <4 x i8>* %17, align 1, !tbaa !36
+  %18 = getelementptr i8, i8* %16, i64 4
+  %19 = bitcast i8* %18 to <4 x i8>*
+  %wide.load71 = load <4 x i8>, <4 x i8>* %19, align 1, !tbaa !36
+  %20 = uitofp <4 x i8> %wide.load to <4 x float>
+  %21 = uitofp <4 x i8> %wide.load71 to <4 x float>
+  %22 = fmul fast <4 x float> %20, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %23 = fmul fast <4 x float> %21, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %24 = getelementptr inbounds float, float* %0, i64 %index
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> %22, <4 x float>* %25, align 4, !tbaa !17
+  %26 = getelementptr float, float* %24, i64 4
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> %23, <4 x float>* %27, align 4, !tbaa !17
+  %index.next = add i64 %index, 8
+  %28 = getelementptr inbounds i8, i8* %call, i64 %index.next
+  %29 = bitcast i8* %28 to <4 x i8>*
+  %wide.load.1 = load <4 x i8>, <4 x i8>* %29, align 1, !tbaa !36
+  %30 = getelementptr i8, i8* %28, i64 4
+  %31 = bitcast i8* %30 to <4 x i8>*
+  %wide.load71.1 = load <4 x i8>, <4 x i8>* %31, align 1, !tbaa !36
+  %32 = uitofp <4 x i8> %wide.load.1 to <4 x float>
+  %33 = uitofp <4 x i8> %wide.load71.1 to <4 x float>
+  %34 = fmul fast <4 x float> %32, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %35 = fmul fast <4 x float> %33, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %36 = getelementptr inbounds float, float* %0, i64 %index.next
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> %34, <4 x float>* %37, align 4, !tbaa !17
+  %38 = getelementptr float, float* %36, i64 4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> %35, <4 x float>* %39, align 4, !tbaa !17
+  %index.next.1 = add i64 %index, 16
+  %40 = icmp eq i64 %index.next.1, %n.vec
+  br i1 %40, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !37
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa
+  %cmp.n = icmp eq i64 %umax, %n.vec
+  br i1 %cmp.n, label %for.cond.cleanup.loopexit, label %for.body.preheader72
+
+for.cond.cleanup.loopexit.loopexit:               ; preds = %for.body
+  br label %for.cond.cleanup.loopexit
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup.loopexit.loopexit, %middle.block
+  %arrayidx22.phi.trans.insert = getelementptr inbounds i8, i8* %call10, i64 40
+  %.phi.trans.insert = bitcast i8* %arrayidx22.phi.trans.insert to float*
+  %.pre = load float, float* %.phi.trans.insert, align 4, !tbaa !17
+  %phitmp = fpext float %.pre to double
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %if.end, %for.cond.cleanup.loopexit
+  %41 = phi double [ %phitmp, %for.cond.cleanup.loopexit ], [ undef, %if.end ]
+  %call24 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.17, i64 0, i64 0), i32 10, double %41)
+  %conv25 = sext i32 %dim1_size to i64
+  %conv26 = sext i32 %dim2_size to i64
+  %conv27 = sext i32 %dim3_size to i64
+  %conv28 = sext i32 %dim4_size to i64
+  %call29 = tail call i8* @create4DTensor(i32 %data_type, i32 0, i64 %conv25, i64 %conv26, i64 %conv27, i64 %conv28) #2
+  %conv30 = sext i32 %mul6 to i64
+  tail call void @initTensorData(i8* %call29, i8* %call10, i64 %conv30) #2
+  tail call void @hpvm_request_tensor(i8* %call29, i32 0) #2
+  %host_data.i = getelementptr inbounds i8, i8* %call29, i64 32
+  %42 = bitcast i8* %host_data.i to float**
+  %43 = load float*, float** %42, align 8, !tbaa !15
+  br i1 %cmp1966, label %_Z13compareValuesPvPfm.exit, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %for.cond.cleanup
+  br label %for.body.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  %conv.i = zext i32 %inc.i to i64
+  %cmp.i = icmp ult i64 %conv.i, %conv
+  br i1 %cmp.i, label %for.body.i, label %_Z13compareValuesPvPfm.exit.loopexit
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.cond.i
+  %conv13.i = phi i64 [ %conv.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %i.012.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %arrayidx.i = getelementptr inbounds float, float* %43, i64 %conv13.i
+  %44 = load float, float* %arrayidx.i, align 4, !tbaa !17
+  %arrayidx2.i = getelementptr inbounds float, float* %0, i64 %conv13.i
+  %45 = load float, float* %arrayidx2.i, align 4, !tbaa !17
+  %cmp3.i = fcmp fast une float %44, %45
+  %inc.i = add i32 %i.012.i, 1
+  br i1 %cmp3.i, label %if.then.i, label %for.cond.i
+
+if.then.i:                                        ; preds = %for.body.i
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.14, i64 0, i64 0)) #2
+  tail call void @abort() #8
+  unreachable
+
+_Z13compareValuesPvPfm.exit.loopexit:             ; preds = %for.cond.i
+  br label %_Z13compareValuesPvPfm.exit
+
+_Z13compareValuesPvPfm.exit:                      ; preds = %_Z13compareValuesPvPfm.exit.loopexit, %for.cond.cleanup
+  ret i8* %call29
+
+for.body:                                         ; preds = %for.body.preheader72, %for.body
+  %i.067 = phi i64 [ %inc, %for.body ], [ %i.067.ph, %for.body.preheader72 ]
+  %arrayidx = getelementptr inbounds i8, i8* %call, i64 %i.067
+  %46 = load i8, i8* %arrayidx, align 1, !tbaa !36
+  %conv20 = uitofp i8 %46 to float
+  %div = fmul fast float %conv20, 0x3F70101020000000
+  %arrayidx21 = getelementptr inbounds float, float* %0, i64 %i.067
+  store float %div, float* %arrayidx21, align 4, !tbaa !17
+  %inc = add nuw i64 %i.067, 1
+  %cmp19 = icmp ult i64 %inc, %conv
+  br i1 %cmp19, label %for.body, label %for.cond.cleanup.loopexit.loopexit, !llvm.loop !38
+}
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i32 @fseek(%struct._IO_FILE* nocapture, i64, i32) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i64 @fread(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+declare i8* @create4DTensor(i32, i32, i64, i64, i64, i64) local_unnamed_addr #0
+
+declare void @initTensorData(i8*, i8*, i64) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %file_name, i32 %data_type, i32 %dim1_size, i32 %dim2_size, i32 %dim3_size, i32 %dim4_size) local_unnamed_addr #3 {
+entry:
+  %mul = mul nsw i32 %dim2_size, %dim1_size
+  %mul1 = mul nsw i32 %mul, %dim3_size
+  %mul2 = mul nsw i32 %mul1, %dim4_size
+  %conv = sext i32 %mul2 to i64
+  %mul7 = shl nsw i64 %conv, 2
+  %call = tail call noalias i8* @malloc(i64 %mul7) #2
+  %call8 = tail call %struct._IO_FILE* @fopen(i8* %file_name, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.15, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call8, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call9 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.16, i64 0, i64 0), i8* %file_name)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %0 = bitcast i8* %call to float*
+  %mul3 = shl i32 %dim1_size, 2
+  %mul4 = mul nsw i32 %mul3, %dim2_size
+  %mul5 = mul nsw i32 %mul4, %dim3_size
+  %mul6 = mul nsw i32 %mul5, %dim4_size
+  %call11 = tail call i32 @fseek(%struct._IO_FILE* nonnull %call8, i64 0, i32 1)
+  %conv12 = sext i32 %mul6 to i64
+  %call13 = tail call i64 @fread(i8* %call, i64 1, i64 %conv12, %struct._IO_FILE* nonnull %call8)
+  %conv14 = sext i32 %dim1_size to i64
+  %conv15 = sext i32 %dim2_size to i64
+  %conv16 = sext i32 %dim3_size to i64
+  %conv17 = sext i32 %dim4_size to i64
+  %call18 = tail call i8* @create4DTensor(i32 %data_type, i32 0, i64 %conv14, i64 %conv15, i64 %conv16, i64 %conv17) #2
+  %1 = bitcast i8* %call18 to %struct.Tensor*
+  tail call void @initTensorData(i8* %call18, i8* %call, i64 %conv12) #2
+  tail call void @hpvm_request_tensor(i8* %call18, i32 0) #2
+  %host_data.i = getelementptr inbounds i8, i8* %call18, i64 32
+  %2 = bitcast i8* %host_data.i to float**
+  %3 = load float*, float** %2, align 8, !tbaa !15
+  %cmp11.i = icmp eq i32 %mul2, 0
+  br i1 %cmp11.i, label %_Z13compareValuesPvPfm.exit, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %if.end
+  br label %for.body.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  %conv.i = zext i32 %inc.i to i64
+  %cmp.i = icmp ult i64 %conv.i, %conv
+  br i1 %cmp.i, label %for.body.i, label %_Z13compareValuesPvPfm.exit.loopexit
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.cond.i
+  %conv13.i = phi i64 [ %conv.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %i.012.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %arrayidx.i = getelementptr inbounds float, float* %3, i64 %conv13.i
+  %4 = load float, float* %arrayidx.i, align 4, !tbaa !17
+  %arrayidx2.i = getelementptr inbounds float, float* %0, i64 %conv13.i
+  %5 = load float, float* %arrayidx2.i, align 4, !tbaa !17
+  %cmp3.i = fcmp fast une float %4, %5
+  %inc.i = add i32 %i.012.i, 1
+  br i1 %cmp3.i, label %if.then.i, label %for.cond.i
+
+if.then.i:                                        ; preds = %for.body.i
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.14, i64 0, i64 0)) #2
+  tail call void @abort() #8
+  unreachable
+
+_Z13compareValuesPvPfm.exit.loopexit:             ; preds = %for.cond.i
+  br label %_Z13compareValuesPvPfm.exit
+
+_Z13compareValuesPvPfm.exit:                      ; preds = %_Z13compareValuesPvPfm.exit.loopexit, %if.end
+  ret %struct.Tensor* %1
+}
+
+; Function Attrs: nounwind uwtable
+define noalias i8* @_Z10readLabelsPci(i8* %labels_file, i32 %num_labels) local_unnamed_addr #3 {
+entry:
+  %conv = sext i32 %num_labels to i64
+  %call = tail call noalias i8* @malloc(i64 %conv) #2
+  %call1 = tail call %struct._IO_FILE* @fopen(i8* %labels_file, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.15, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call1, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.18, i64 0, i64 0), i8* %labels_file)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %call4 = tail call i32 @fseek(%struct._IO_FILE* nonnull %call1, i64 8, i32 1)
+  %call7 = tail call i64 @fread(i8* %call, i64 1, i64 %conv, %struct._IO_FILE* nonnull %call1)
+  %call8 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.19, i64 0, i64 0), i64 %call7)
+  ret i8* %call
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z15computeAccuracyPciPv(i8* %labels_file, i32 %num_labels, i8* nocapture readonly %result_ptr) local_unnamed_addr #3 {
+entry:
+  %ss = alloca %"class.std::__cxx11::basic_ostringstream", align 16
+  %print_str = alloca %"class.std::__cxx11::basic_string", align 8
+  %call = tail call i8* @_Z10readLabelsPci(i8* %labels_file, i32 %num_labels)
+  %dim_sizes = getelementptr inbounds i8, i8* %result_ptr, i64 72
+  %0 = bitcast i8* %dim_sizes to i64**
+  %1 = load i64*, i64** %0, align 8, !tbaa !11
+  %2 = load i64, i64* %1, align 8, !tbaa !12
+  %arrayidx3 = getelementptr inbounds i64, i64* %1, i64 1
+  %3 = load i64, i64* %arrayidx3, align 8, !tbaa !12
+  %host_data = getelementptr inbounds i8, i8* %result_ptr, i64 32
+  %4 = bitcast i8* %host_data to float**
+  %5 = load float*, float** %4, align 8, !tbaa !15
+  %cmp87 = icmp eq i64 %2, 0
+  br i1 %cmp87, label %for.cond.cleanup, label %for.cond4.preheader.preheader
+
+for.cond4.preheader.preheader:                    ; preds = %entry
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.cond4.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %num_errors.089 = phi i32 [ %num_errors.0.inc21, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %mul = mul i64 %indvars.iv, %3
+  %arrayidx10 = getelementptr inbounds float, float* %5, i64 %mul
+  %6 = load float, float* %arrayidx10, align 4, !tbaa !17
+  %add14 = add i64 %mul, 1
+  %arrayidx15 = getelementptr inbounds float, float* %5, i64 %add14
+  %7 = load float, float* %arrayidx15, align 4, !tbaa !17
+  %cmp16 = fcmp fast olt float %6, %7
+  %chosen.1 = zext i1 %cmp16 to i32
+  %conv9.1 = zext i1 %cmp16 to i64
+  %add.1 = add i64 %conv9.1, %mul
+  %arrayidx10.1 = getelementptr inbounds float, float* %5, i64 %add.1
+  %8 = load float, float* %arrayidx10.1, align 4, !tbaa !17
+  %add14.1 = add i64 %mul, 2
+  %arrayidx15.1 = getelementptr inbounds float, float* %5, i64 %add14.1
+  %9 = load float, float* %arrayidx15.1, align 4, !tbaa !17
+  %cmp16.1 = fcmp fast olt float %8, %9
+  %chosen.1.1 = select i1 %cmp16.1, i32 2, i32 %chosen.1
+  %conv9.291 = zext i32 %chosen.1.1 to i64
+  %add.2 = add i64 %conv9.291, %mul
+  %arrayidx10.2 = getelementptr inbounds float, float* %5, i64 %add.2
+  %10 = load float, float* %arrayidx10.2, align 4, !tbaa !17
+  %add14.2 = add i64 %mul, 3
+  %arrayidx15.2 = getelementptr inbounds float, float* %5, i64 %add14.2
+  %11 = load float, float* %arrayidx15.2, align 4, !tbaa !17
+  %cmp16.2 = fcmp fast olt float %10, %11
+  %chosen.1.2 = select i1 %cmp16.2, i32 3, i32 %chosen.1.1
+  %conv9.392 = zext i32 %chosen.1.2 to i64
+  %add.3 = add i64 %conv9.392, %mul
+  %arrayidx10.3 = getelementptr inbounds float, float* %5, i64 %add.3
+  %12 = load float, float* %arrayidx10.3, align 4, !tbaa !17
+  %add14.3 = add i64 %mul, 4
+  %arrayidx15.3 = getelementptr inbounds float, float* %5, i64 %add14.3
+  %13 = load float, float* %arrayidx15.3, align 4, !tbaa !17
+  %cmp16.3 = fcmp fast olt float %12, %13
+  %chosen.1.3 = select i1 %cmp16.3, i32 4, i32 %chosen.1.2
+  %conv9.493 = zext i32 %chosen.1.3 to i64
+  %add.4 = add i64 %conv9.493, %mul
+  %arrayidx10.4 = getelementptr inbounds float, float* %5, i64 %add.4
+  %14 = load float, float* %arrayidx10.4, align 4, !tbaa !17
+  %add14.4 = add i64 %mul, 5
+  %arrayidx15.4 = getelementptr inbounds float, float* %5, i64 %add14.4
+  %15 = load float, float* %arrayidx15.4, align 4, !tbaa !17
+  %cmp16.4 = fcmp fast olt float %14, %15
+  %chosen.1.4 = select i1 %cmp16.4, i32 5, i32 %chosen.1.3
+  %conv9.594 = zext i32 %chosen.1.4 to i64
+  %add.5 = add i64 %conv9.594, %mul
+  %arrayidx10.5 = getelementptr inbounds float, float* %5, i64 %add.5
+  %16 = load float, float* %arrayidx10.5, align 4, !tbaa !17
+  %add14.5 = add i64 %mul, 6
+  %arrayidx15.5 = getelementptr inbounds float, float* %5, i64 %add14.5
+  %17 = load float, float* %arrayidx15.5, align 4, !tbaa !17
+  %cmp16.5 = fcmp fast olt float %16, %17
+  %chosen.1.5 = select i1 %cmp16.5, i32 6, i32 %chosen.1.4
+  %18 = zext i32 %chosen.1.5 to i64
+  %add.6 = add i64 %18, %mul
+  %arrayidx10.6 = getelementptr inbounds float, float* %5, i64 %add.6
+  %19 = load float, float* %arrayidx10.6, align 4, !tbaa !17
+  %add14.6 = add i64 %mul, 7
+  %arrayidx15.6 = getelementptr inbounds float, float* %5, i64 %add14.6
+  %20 = load float, float* %arrayidx15.6, align 4, !tbaa !17
+  %cmp16.6 = fcmp fast olt float %19, %20
+  %chosen.1.6 = select i1 %cmp16.6, i32 7, i32 %chosen.1.5
+  %conv9.7 = sext i32 %chosen.1.6 to i64
+  %add.7 = add i64 %conv9.7, %mul
+  %arrayidx10.7 = getelementptr inbounds float, float* %5, i64 %add.7
+  %21 = load float, float* %arrayidx10.7, align 4, !tbaa !17
+  %add14.7 = add i64 %mul, 8
+  %arrayidx15.7 = getelementptr inbounds float, float* %5, i64 %add14.7
+  %22 = load float, float* %arrayidx15.7, align 4, !tbaa !17
+  %cmp16.7 = fcmp fast olt float %21, %22
+  %chosen.1.7 = select i1 %cmp16.7, i32 8, i32 %chosen.1.6
+  %conv9.8 = sext i32 %chosen.1.7 to i64
+  %add.8 = add i64 %conv9.8, %mul
+  %arrayidx10.8 = getelementptr inbounds float, float* %5, i64 %add.8
+  %23 = load float, float* %arrayidx10.8, align 4, !tbaa !17
+  %add14.8 = add i64 %mul, 9
+  %arrayidx15.8 = getelementptr inbounds float, float* %5, i64 %add14.8
+  %24 = load float, float* %arrayidx15.8, align 4, !tbaa !17
+  %cmp16.8 = fcmp fast olt float %23, %24
+  %chosen.1.8 = select i1 %cmp16.8, i32 9, i32 %chosen.1.7
+  %arrayidx17 = getelementptr inbounds i8, i8* %call, i64 %indvars.iv
+  %25 = load i8, i8* %arrayidx17, align 1, !tbaa !36
+  %conv18 = zext i8 %25 to i32
+  %not.cmp19 = icmp ne i32 %chosen.1.8, %conv18
+  %inc21 = zext i1 %not.cmp19 to i32
+  %num_errors.0.inc21 = add nsw i32 %inc21, %num_errors.089
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %2
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.cond4.preheader
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond4.preheader
+  %phitmp = sext i32 %num_errors.0.inc21 to i64
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %num_errors.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+  %sub = sub i64 %2, %num_errors.0.lcssa
+  %conv27 = uitofp i64 %sub to double
+  %conv29 = uitofp i64 %2 to double
+  %div = fdiv fast double %conv27, %conv29
+  %mul31 = fmul fast double %div, 1.000000e+02
+  %conv32 = fptrunc double %mul31 to float
+  %conv33 = fpext float %conv32 to double
+  %call34 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.20, i64 0, i64 0), double %conv33)
+  %call35 = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.21, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.22, i64 0, i64 0))
+  %cmp36 = icmp eq %struct._IO_FILE* %call35, null
+  br i1 %cmp36, label %if.end44, label %if.then37
+
+if.then37:                                        ; preds = %for.cond.cleanup
+  %26 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8*
+  call void @llvm.lifetime.start(i64 376, i8* nonnull %26) #2
+  %27 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2
+  %28 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0
+  call void @_ZNSt8ios_baseC2Ev(%"class.std::ios_base"* %28) #2
+  %29 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !40
+  %_M_tie.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 1
+  store %"class.std::basic_ostream"* null, %"class.std::basic_ostream"** %_M_tie.i.i, align 8, !tbaa !42
+  %_M_fill.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 2
+  store i8 0, i8* %_M_fill.i.i, align 16, !tbaa !45
+  %_M_fill_init.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 3
+  store i8 0, i8* %_M_fill_init.i.i, align 1, !tbaa !46
+  %_M_streambuf.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 4
+  %30 = bitcast %"class.std::basic_streambuf"** %_M_streambuf.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %30, i8 0, i64 32, i32 8, i1 false) #2
+  %31 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 1) to i64*), align 8
+  %32 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i64*
+  store i64 %31, i64* %32, align 16, !tbaa !40
+  %33 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 2) to i64*), align 8
+  %34 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8**
+  %vtable.cast.i.i = inttoptr i64 %31 to i8*
+  %vbase.offset.ptr.i.i = getelementptr i8, i8* %vtable.cast.i.i, i64 -24
+  %35 = bitcast i8* %vbase.offset.ptr.i.i to i64*
+  %vbase.offset.i.i = load i64, i64* %35, align 8
+  %add.ptr.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i
+  %36 = bitcast i8* %add.ptr.i.i to i64*
+  store i64 %33, i64* %36, align 8, !tbaa !40
+  %vtable3.i.i = load i8*, i8** %34, align 16, !tbaa !40
+  %vbase.offset.ptr4.i.i = getelementptr i8, i8* %vtable3.i.i, i64 -24
+  %37 = bitcast i8* %vbase.offset.ptr4.i.i to i64*
+  %vbase.offset5.i.i = load i64, i64* %37, align 8
+  %add.ptr6.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset5.i.i
+  %38 = bitcast i8* %add.ptr6.i.i to %"class.std::basic_ios"*
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %38, %"class.std::basic_streambuf"* null) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !40
+  %_M_stringbuf.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1
+  %39 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0, i32 0
+  %40 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to <2 x i32 (...)**>*
+  store <2 x i32 (...)**> <i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**)>, <2 x i32 (...)**>* %40, align 16, !tbaa !40
+  %_M_in_beg.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 1
+  %_M_buf_locale.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 7
+  %41 = bitcast i8** %_M_in_beg.i.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %41, i8 0, i64 48, i32 8, i1 false) #2
+  call void @_ZNSt6localeC1Ev(%"class.std::locale"* %_M_buf_locale.i.i.i) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %39, align 8, !tbaa !40
+  %_M_mode.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 1
+  store i32 16, i32* %_M_mode.i.i, align 8, !tbaa !47
+  %_M_string.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2
+  %42 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 2
+  %43 = bitcast %"class.std::__cxx11::basic_string"* %_M_string.i.i to %union.anon**
+  store %union.anon* %42, %union.anon** %43, align 8, !tbaa !52
+  %_M_string_length.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 1
+  store i64 0, i64* %_M_string_length.i.i.i.i.i, align 8, !tbaa !53
+  %.cast.i.i.i = bitcast %union.anon* %42 to i8*
+  store i8 0, i8* %.cast.i.i.i, align 8, !tbaa !36
+  %vtable.i = load i8*, i8** %34, align 16, !tbaa !40
+  %vbase.offset.ptr.i = getelementptr i8, i8* %vtable.i, i64 -24
+  %44 = bitcast i8* %vbase.offset.ptr.i to i64*
+  %vbase.offset.i = load i64, i64* %44, align 8
+  %add.ptr2.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i
+  %45 = bitcast i8* %add.ptr2.i to %"class.std::basic_ios"*
+  %46 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %45, %"class.std::basic_streambuf"* %46) #2
+  %47 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to %"class.std::basic_ostream"*
+  %vtable.i74 = load i8*, i8** %34, align 16, !tbaa !40
+  %vbase.offset.ptr.i75 = getelementptr i8, i8* %vtable.i74, i64 -24
+  %48 = bitcast i8* %vbase.offset.ptr.i75 to i64*
+  %vbase.offset.i76 = load i64, i64* %48, align 8
+  %add.ptr.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i76
+  %_M_flags.i = getelementptr inbounds i8, i8* %add.ptr.i, i64 24
+  %49 = bitcast i8* %_M_flags.i to i32*
+  %50 = load i32, i32* %49, align 4, !tbaa !54
+  %and.i = and i32 %50, -261
+  %or.i = or i32 %and.i, 4
+  store i32 %or.i, i32* %49, align 4, !tbaa !54
+  %call.i = call dereferenceable(272) %"class.std::basic_ostream"* @_ZNSo9_M_insertIdEERSoT_(%"class.std::basic_ostream"* nonnull %47, double %conv33) #2
+  %51 = bitcast %"class.std::__cxx11::basic_string"* %print_str to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %51) #2
+  call void @_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv(%"class.std::__cxx11::basic_string"* nonnull sret %print_str, %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i) #2
+  %_M_p.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 0, i32 0
+  %52 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %_M_string_length.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 1
+  %53 = load i64, i64* %_M_string_length.i, align 8, !tbaa !53
+  %call42 = call i64 @fwrite(i8* %52, i64 1, i64 %53, %struct._IO_FILE* nonnull %call35)
+  %call43 = call i32 @fclose(%struct._IO_FILE* nonnull %call35)
+  %54 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %55 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 2
+  %arraydecay.i.i.i.i = bitcast %union.anon* %55 to i8*
+  %cmp.i.i.i = icmp eq i8* %54, %arraydecay.i.i.i.i
+  br i1 %cmp.i.i.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %if.then37
+  call void @_ZdlPv(i8* %54) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit: ; preds = %if.then37, %if.then.i.i
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %51) #2
+  %56 = load i64, i64* bitcast ([4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE to i64*), align 8
+  store i64 %56, i64* %32, align 16, !tbaa !40
+  %57 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 3) to i64*), align 8
+  %vtable.cast.i.i81 = inttoptr i64 %56 to i8*
+  %vbase.offset.ptr.i.i82 = getelementptr i8, i8* %vtable.cast.i.i81, i64 -24
+  %58 = bitcast i8* %vbase.offset.ptr.i.i82 to i64*
+  %vbase.offset.i.i83 = load i64, i64* %58, align 8
+  %add.ptr.i.i84 = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i83
+  %59 = bitcast i8* %add.ptr.i.i84 to i64*
+  store i64 %57, i64* %59, align 8, !tbaa !40
+  %60 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !40
+  %_M_p.i.i.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 0, i32 0
+  %61 = load i8*, i8** %_M_p.i.i.i.i.i.i.i, align 8, !tbaa !56
+  %cmp.i.i.i.i.i.i = icmp eq i8* %61, %.cast.i.i.i
+  br i1 %cmp.i.i.i.i.i.i, label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+  call void @_ZdlPv(i8* %61) #2
+  br label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit
+
+_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, %if.then.i.i.i.i.i
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !40
+  call void @_ZNSt6localeD1Ev(%"class.std::locale"* nonnull %_M_buf_locale.i.i.i) #2
+  %62 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 0
+  call void @_ZNSt8ios_baseD2Ev(%"class.std::ios_base"* %62) #2
+  call void @llvm.lifetime.end(i64 376, i8* nonnull %26) #2
+  br label %if.end44
+
+if.end44:                                         ; preds = %for.cond.cleanup, %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z16computeAccuracy2PhiPv(i8* nocapture readonly %labels, i32 %num_labels, i8* nocapture readonly %result_ptr) local_unnamed_addr #3 {
+entry:
+  %ss = alloca %"class.std::__cxx11::basic_ostringstream", align 16
+  %print_str = alloca %"class.std::__cxx11::basic_string", align 8
+  %dim_sizes = getelementptr inbounds i8, i8* %result_ptr, i64 72
+  %0 = bitcast i8* %dim_sizes to i64**
+  %1 = load i64*, i64** %0, align 8, !tbaa !11
+  %2 = load i64, i64* %1, align 8, !tbaa !12
+  %arrayidx3 = getelementptr inbounds i64, i64* %1, i64 1
+  %3 = load i64, i64* %arrayidx3, align 8, !tbaa !12
+  %host_data = getelementptr inbounds i8, i8* %result_ptr, i64 32
+  %4 = bitcast i8* %host_data to float**
+  %5 = load float*, float** %4, align 8, !tbaa !15
+  %cmp82 = icmp eq i64 %2, 0
+  br i1 %cmp82, label %for.cond.cleanup, label %for.cond4.preheader.preheader
+
+for.cond4.preheader.preheader:                    ; preds = %entry
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.cond4.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %num_errors.084 = phi i32 [ %num_errors.0.inc21, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %mul = mul i64 %indvars.iv, %3
+  %arrayidx10 = getelementptr inbounds float, float* %5, i64 %mul
+  %6 = load float, float* %arrayidx10, align 4, !tbaa !17
+  %add14 = add i64 %mul, 1
+  %arrayidx15 = getelementptr inbounds float, float* %5, i64 %add14
+  %7 = load float, float* %arrayidx15, align 4, !tbaa !17
+  %cmp16 = fcmp fast olt float %6, %7
+  %chosen.1 = zext i1 %cmp16 to i32
+  %conv9.1 = zext i1 %cmp16 to i64
+  %add.1 = add i64 %conv9.1, %mul
+  %arrayidx10.1 = getelementptr inbounds float, float* %5, i64 %add.1
+  %8 = load float, float* %arrayidx10.1, align 4, !tbaa !17
+  %add14.1 = add i64 %mul, 2
+  %arrayidx15.1 = getelementptr inbounds float, float* %5, i64 %add14.1
+  %9 = load float, float* %arrayidx15.1, align 4, !tbaa !17
+  %cmp16.1 = fcmp fast olt float %8, %9
+  %chosen.1.1 = select i1 %cmp16.1, i32 2, i32 %chosen.1
+  %conv9.286 = zext i32 %chosen.1.1 to i64
+  %add.2 = add i64 %conv9.286, %mul
+  %arrayidx10.2 = getelementptr inbounds float, float* %5, i64 %add.2
+  %10 = load float, float* %arrayidx10.2, align 4, !tbaa !17
+  %add14.2 = add i64 %mul, 3
+  %arrayidx15.2 = getelementptr inbounds float, float* %5, i64 %add14.2
+  %11 = load float, float* %arrayidx15.2, align 4, !tbaa !17
+  %cmp16.2 = fcmp fast olt float %10, %11
+  %chosen.1.2 = select i1 %cmp16.2, i32 3, i32 %chosen.1.1
+  %conv9.387 = zext i32 %chosen.1.2 to i64
+  %add.3 = add i64 %conv9.387, %mul
+  %arrayidx10.3 = getelementptr inbounds float, float* %5, i64 %add.3
+  %12 = load float, float* %arrayidx10.3, align 4, !tbaa !17
+  %add14.3 = add i64 %mul, 4
+  %arrayidx15.3 = getelementptr inbounds float, float* %5, i64 %add14.3
+  %13 = load float, float* %arrayidx15.3, align 4, !tbaa !17
+  %cmp16.3 = fcmp fast olt float %12, %13
+  %chosen.1.3 = select i1 %cmp16.3, i32 4, i32 %chosen.1.2
+  %conv9.488 = zext i32 %chosen.1.3 to i64
+  %add.4 = add i64 %conv9.488, %mul
+  %arrayidx10.4 = getelementptr inbounds float, float* %5, i64 %add.4
+  %14 = load float, float* %arrayidx10.4, align 4, !tbaa !17
+  %add14.4 = add i64 %mul, 5
+  %arrayidx15.4 = getelementptr inbounds float, float* %5, i64 %add14.4
+  %15 = load float, float* %arrayidx15.4, align 4, !tbaa !17
+  %cmp16.4 = fcmp fast olt float %14, %15
+  %chosen.1.4 = select i1 %cmp16.4, i32 5, i32 %chosen.1.3
+  %conv9.589 = zext i32 %chosen.1.4 to i64
+  %add.5 = add i64 %conv9.589, %mul
+  %arrayidx10.5 = getelementptr inbounds float, float* %5, i64 %add.5
+  %16 = load float, float* %arrayidx10.5, align 4, !tbaa !17
+  %add14.5 = add i64 %mul, 6
+  %arrayidx15.5 = getelementptr inbounds float, float* %5, i64 %add14.5
+  %17 = load float, float* %arrayidx15.5, align 4, !tbaa !17
+  %cmp16.5 = fcmp fast olt float %16, %17
+  %chosen.1.5 = select i1 %cmp16.5, i32 6, i32 %chosen.1.4
+  %18 = zext i32 %chosen.1.5 to i64
+  %add.6 = add i64 %18, %mul
+  %arrayidx10.6 = getelementptr inbounds float, float* %5, i64 %add.6
+  %19 = load float, float* %arrayidx10.6, align 4, !tbaa !17
+  %add14.6 = add i64 %mul, 7
+  %arrayidx15.6 = getelementptr inbounds float, float* %5, i64 %add14.6
+  %20 = load float, float* %arrayidx15.6, align 4, !tbaa !17
+  %cmp16.6 = fcmp fast olt float %19, %20
+  %chosen.1.6 = select i1 %cmp16.6, i32 7, i32 %chosen.1.5
+  %conv9.7 = sext i32 %chosen.1.6 to i64
+  %add.7 = add i64 %conv9.7, %mul
+  %arrayidx10.7 = getelementptr inbounds float, float* %5, i64 %add.7
+  %21 = load float, float* %arrayidx10.7, align 4, !tbaa !17
+  %add14.7 = add i64 %mul, 8
+  %arrayidx15.7 = getelementptr inbounds float, float* %5, i64 %add14.7
+  %22 = load float, float* %arrayidx15.7, align 4, !tbaa !17
+  %cmp16.7 = fcmp fast olt float %21, %22
+  %chosen.1.7 = select i1 %cmp16.7, i32 8, i32 %chosen.1.6
+  %conv9.8 = sext i32 %chosen.1.7 to i64
+  %add.8 = add i64 %conv9.8, %mul
+  %arrayidx10.8 = getelementptr inbounds float, float* %5, i64 %add.8
+  %23 = load float, float* %arrayidx10.8, align 4, !tbaa !17
+  %add14.8 = add i64 %mul, 9
+  %arrayidx15.8 = getelementptr inbounds float, float* %5, i64 %add14.8
+  %24 = load float, float* %arrayidx15.8, align 4, !tbaa !17
+  %cmp16.8 = fcmp fast olt float %23, %24
+  %chosen.1.8 = select i1 %cmp16.8, i32 9, i32 %chosen.1.7
+  %arrayidx17 = getelementptr inbounds i8, i8* %labels, i64 %indvars.iv
+  %25 = load i8, i8* %arrayidx17, align 1, !tbaa !36
+  %conv18 = zext i8 %25 to i32
+  %not.cmp19 = icmp ne i32 %chosen.1.8, %conv18
+  %inc21 = zext i1 %not.cmp19 to i32
+  %num_errors.0.inc21 = add nsw i32 %inc21, %num_errors.084
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %2
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.cond4.preheader
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond4.preheader
+  %phitmp = sext i32 %num_errors.0.inc21 to i64
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %num_errors.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+  %sub = sub i64 %2, %num_errors.0.lcssa
+  %conv27 = uitofp i64 %sub to double
+  %conv29 = uitofp i64 %2 to double
+  %div = fdiv fast double %conv27, %conv29
+  %mul31 = fmul fast double %div, 1.000000e+02
+  %conv32 = fptrunc double %mul31 to float
+  %conv33 = fpext float %conv32 to double
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.20, i64 0, i64 0), double %conv33)
+  %call34 = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.21, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.22, i64 0, i64 0))
+  %cmp35 = icmp eq %struct._IO_FILE* %call34, null
+  br i1 %cmp35, label %if.end43, label %if.then36
+
+if.then36:                                        ; preds = %for.cond.cleanup
+  %26 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8*
+  call void @llvm.lifetime.start(i64 376, i8* nonnull %26) #2
+  %27 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2
+  %28 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0
+  call void @_ZNSt8ios_baseC2Ev(%"class.std::ios_base"* %28) #2
+  %29 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !40
+  %_M_tie.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 1
+  store %"class.std::basic_ostream"* null, %"class.std::basic_ostream"** %_M_tie.i.i, align 8, !tbaa !42
+  %_M_fill.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 2
+  store i8 0, i8* %_M_fill.i.i, align 16, !tbaa !45
+  %_M_fill_init.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 3
+  store i8 0, i8* %_M_fill_init.i.i, align 1, !tbaa !46
+  %_M_streambuf.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 4
+  %30 = bitcast %"class.std::basic_streambuf"** %_M_streambuf.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %30, i8 0, i64 32, i32 8, i1 false) #2
+  %31 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 1) to i64*), align 8
+  %32 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i64*
+  store i64 %31, i64* %32, align 16, !tbaa !40
+  %33 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 2) to i64*), align 8
+  %34 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8**
+  %vtable.cast.i.i = inttoptr i64 %31 to i8*
+  %vbase.offset.ptr.i.i = getelementptr i8, i8* %vtable.cast.i.i, i64 -24
+  %35 = bitcast i8* %vbase.offset.ptr.i.i to i64*
+  %vbase.offset.i.i = load i64, i64* %35, align 8
+  %add.ptr.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i
+  %36 = bitcast i8* %add.ptr.i.i to i64*
+  store i64 %33, i64* %36, align 8, !tbaa !40
+  %vtable3.i.i = load i8*, i8** %34, align 16, !tbaa !40
+  %vbase.offset.ptr4.i.i = getelementptr i8, i8* %vtable3.i.i, i64 -24
+  %37 = bitcast i8* %vbase.offset.ptr4.i.i to i64*
+  %vbase.offset5.i.i = load i64, i64* %37, align 8
+  %add.ptr6.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset5.i.i
+  %38 = bitcast i8* %add.ptr6.i.i to %"class.std::basic_ios"*
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %38, %"class.std::basic_streambuf"* null) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !40
+  %_M_stringbuf.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1
+  %39 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0, i32 0
+  %40 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to <2 x i32 (...)**>*
+  store <2 x i32 (...)**> <i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**)>, <2 x i32 (...)**>* %40, align 16, !tbaa !40
+  %_M_in_beg.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 1
+  %_M_buf_locale.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 7
+  %41 = bitcast i8** %_M_in_beg.i.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %41, i8 0, i64 48, i32 8, i1 false) #2
+  call void @_ZNSt6localeC1Ev(%"class.std::locale"* %_M_buf_locale.i.i.i) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %39, align 8, !tbaa !40
+  %_M_mode.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 1
+  store i32 16, i32* %_M_mode.i.i, align 8, !tbaa !47
+  %_M_string.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2
+  %42 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 2
+  %43 = bitcast %"class.std::__cxx11::basic_string"* %_M_string.i.i to %union.anon**
+  store %union.anon* %42, %union.anon** %43, align 8, !tbaa !52
+  %_M_string_length.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 1
+  store i64 0, i64* %_M_string_length.i.i.i.i.i, align 8, !tbaa !53
+  %.cast.i.i.i = bitcast %union.anon* %42 to i8*
+  store i8 0, i8* %.cast.i.i.i, align 8, !tbaa !36
+  %vtable.i = load i8*, i8** %34, align 16, !tbaa !40
+  %vbase.offset.ptr.i = getelementptr i8, i8* %vtable.i, i64 -24
+  %44 = bitcast i8* %vbase.offset.ptr.i to i64*
+  %vbase.offset.i = load i64, i64* %44, align 8
+  %add.ptr2.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i
+  %45 = bitcast i8* %add.ptr2.i to %"class.std::basic_ios"*
+  %46 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %45, %"class.std::basic_streambuf"* %46) #2
+  %47 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to %"class.std::basic_ostream"*
+  %vtable.i72 = load i8*, i8** %34, align 16, !tbaa !40
+  %vbase.offset.ptr.i73 = getelementptr i8, i8* %vtable.i72, i64 -24
+  %48 = bitcast i8* %vbase.offset.ptr.i73 to i64*
+  %vbase.offset.i74 = load i64, i64* %48, align 8
+  %add.ptr.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i74
+  %_M_flags.i.i = getelementptr inbounds i8, i8* %add.ptr.i, i64 24
+  %49 = bitcast i8* %_M_flags.i.i to i32*
+  %50 = load i32, i32* %49, align 8, !tbaa !57
+  %and.i.i.i.i = and i32 %50, -261
+  %or.i.i.i.i = or i32 %and.i.i.i.i, 4
+  store i32 %or.i.i.i.i, i32* %49, align 4, !tbaa !54
+  %call.i = call dereferenceable(272) %"class.std::basic_ostream"* @_ZNSo9_M_insertIdEERSoT_(%"class.std::basic_ostream"* nonnull %47, double %conv33) #2
+  %51 = bitcast %"class.std::__cxx11::basic_string"* %print_str to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %51) #2
+  call void @_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv(%"class.std::__cxx11::basic_string"* nonnull sret %print_str, %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i) #2
+  %_M_p.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 0, i32 0
+  %52 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %_M_string_length.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 1
+  %53 = load i64, i64* %_M_string_length.i, align 8, !tbaa !53
+  %call41 = call i64 @fwrite(i8* %52, i64 1, i64 %53, %struct._IO_FILE* nonnull %call34)
+  %call42 = call i32 @fclose(%struct._IO_FILE* nonnull %call34)
+  %54 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %55 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 2
+  %arraydecay.i.i.i.i = bitcast %union.anon* %55 to i8*
+  %cmp.i.i.i = icmp eq i8* %54, %arraydecay.i.i.i.i
+  br i1 %cmp.i.i.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %if.then36
+  call void @_ZdlPv(i8* %54) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit: ; preds = %if.then36, %if.then.i.i
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %51) #2
+  %56 = load i64, i64* bitcast ([4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE to i64*), align 8
+  store i64 %56, i64* %32, align 16, !tbaa !40
+  %57 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 3) to i64*), align 8
+  %vtable.cast.i.i76 = inttoptr i64 %56 to i8*
+  %vbase.offset.ptr.i.i77 = getelementptr i8, i8* %vtable.cast.i.i76, i64 -24
+  %58 = bitcast i8* %vbase.offset.ptr.i.i77 to i64*
+  %vbase.offset.i.i78 = load i64, i64* %58, align 8
+  %add.ptr.i.i79 = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i78
+  %59 = bitcast i8* %add.ptr.i.i79 to i64*
+  store i64 %57, i64* %59, align 8, !tbaa !40
+  %60 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !40
+  %_M_p.i.i.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 0, i32 0
+  %61 = load i8*, i8** %_M_p.i.i.i.i.i.i.i, align 8, !tbaa !56
+  %cmp.i.i.i.i.i.i = icmp eq i8* %61, %.cast.i.i.i
+  br i1 %cmp.i.i.i.i.i.i, label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+  call void @_ZdlPv(i8* %61) #2
+  br label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit
+
+_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, %if.then.i.i.i.i.i
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !40
+  call void @_ZNSt6localeD1Ev(%"class.std::locale"* nonnull %_M_buf_locale.i.i.i) #2
+  %62 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 0
+  call void @_ZNSt8ios_baseD2Ev(%"class.std::ios_base"* %62) #2
+  call void @llvm.lifetime.end(i64 376, i8* nonnull %26) #2
+  br label %if.end43
+
+if.end43:                                         ; preds = %for.cond.cleanup, %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z14tensorConvNodePvmS_m(i8* %t1, i64 %bytes1, i8* %t2, i64 %bytes2) #3 {
+entry:
+  tail call void @__visc__hint(i32 4) #2
+  tail call void (i32, ...) @__visc__attributes(i32 2, i8* %t1, i8* %t2, i32 0) #2
+  %call = tail call i8* @__visc__tensor_convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) #2
+  tail call void (i32, ...) @__visc__return(i32 2, i8* %call, i64 0) #2
+  ret void
+}
+
+declare void @__visc__hint(i32) local_unnamed_addr #0
+
+declare void @__visc__attributes(i32, ...) local_unnamed_addr #0
+
+declare i8* @__visc__tensor_convolution(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare void @__visc__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define void @_Z13tensorAddNodePvmS_m(i8* %t1, i64 %bytest1, i8* %t2, i64 %bytest2) #3 {
+entry:
+  tail call void @__visc__hint(i32 4) #2
+  tail call void (i32, ...) @__visc__attributes(i32 2, i8* %t1, i8* %t2, i32 0) #2
+  %call = tail call i8* @__visc__tensor_add(i8* %t1, i8* %t2) #2
+  tail call void (i32, ...) @__visc__return(i32 2, i8* %call, i64 0) #2
+  ret void
+}
+
+declare i8* @__visc__tensor_add(i8*, i8*) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define void @_Z4rootPvmS_mS_mS_mS_m(i8* %x, i64 %x_bytes, i8* %conv1_w, i64 %conv1_w_bytes, i8* %conv1_b, i64 %conv1_b_bytes, i8* %conv2_w, i64 %conv2_w_bytes, i8* %conv2_b, i64 %conv2_b_bytes) #3 {
+entry:
+  tail call void @__visc__hint(i32 1) #2
+  tail call void (i32, ...) @__visc__attributes(i32 5, i8* %x, i8* %conv1_w, i8* %conv1_b, i8* %conv2_w, i8* %conv2_b, i32 0) #2
+  %call = tail call i8* (i32, ...) @__visc__createNodeND(i32 0, void (i8*, i64, i8*, i64)* nonnull @_Z14tensorConvNodePvmS_m) #2
+  %call1 = tail call i8* (i32, ...) @__visc__createNodeND(i32 0, void (i8*, i64, i8*, i64)* nonnull @_Z13tensorAddNodePvmS_m) #2
+  tail call void @__visc__bindIn(i8* %call, i32 0, i32 0, i32 0) #2
+  tail call void @__visc__bindIn(i8* %call, i32 1, i32 1, i32 0) #2
+  tail call void @__visc__bindIn(i8* %call, i32 2, i32 2, i32 0) #2
+  tail call void @__visc__bindIn(i8* %call, i32 3, i32 3, i32 0) #2
+  %call2 = tail call i8* @__visc__edge(i8* %call, i8* %call1, i32 1, i32 0, i32 0, i32 0) #2
+  %call3 = tail call i8* @__visc__edge(i8* %call, i8* %call1, i32 1, i32 1, i32 1, i32 0) #2
+  tail call void @__visc__bindIn(i8* %call1, i32 4, i32 2, i32 0) #2
+  tail call void @__visc__bindIn(i8* %call1, i32 5, i32 3, i32 0) #2
+  tail call void @__visc__bindOut(i8* %call1, i32 0, i32 0, i32 0) #2
+  tail call void @__visc__bindOut(i8* %call1, i32 1, i32 1, i32 0) #2
+  ret void
+}
+
+declare i8* @__visc__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare void @__visc__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare i8* @__visc__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare void @__visc__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @main() local_unnamed_addr #6 {
+entry:
+  %__dnew.i.i.i.i207 = alloca i64, align 8
+  %__dnew.i.i.i.i166 = alloca i64, align 8
+  %__dnew.i.i.i.i125 = alloca i64, align 8
+  %__dnew.i.i.i.i84 = alloca i64, align 8
+  %__dnew.i.i.i.i55 = alloca i64, align 8
+  %__dnew.i.i.i.i = alloca i64, align 8
+  %prefix = alloca %"class.std::__cxx11::basic_string", align 8
+  %input_data_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp1 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv1_w_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp3 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv1_b_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp5 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv2_w_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp7 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv2_b_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp9 = alloca %"class.std::__cxx11::basic_string", align 8
+  %0 = bitcast %"class.std::__cxx11::basic_string"* %prefix to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %0) #2
+  %1 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 2
+  %2 = bitcast %"class.std::__cxx11::basic_string"* %prefix to %union.anon**
+  store %union.anon* %1, %union.anon** %2, align 8, !tbaa !52
+  %3 = bitcast %union.anon* %1 to i8*
+  %4 = bitcast i64* %__dnew.i.i.i.i to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %4) #2
+  store i64 54, i64* %__dnew.i.i.i.i, align 8, !tbaa !12
+  %call5.i.i.i.i = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %prefix, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i, i64 0) #2
+  %_M_p.i13.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56
+  %5 = load i64, i64* %__dnew.i.i.i.i, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 2, i32 0
+  store i64 %5, i64* %_M_allocated_capacity.i.i.i.i.i, align 8, !tbaa !12
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i, i8* nonnull getelementptr inbounds ([55 x i8], [55 x i8]* @.str.23, i64 0, i64 0), i64 54, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 1
+  store i64 %5, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !53
+  %arrayidx.i.i.i.i.i = getelementptr inbounds i8, i8* %call5.i.i.i.i, i64 %5
+  store i8 0, i8* %arrayidx.i.i.i.i.i, align 1, !tbaa !36
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %4) #2
+  %6 = bitcast %"class.std::__cxx11::basic_string"* %input_data_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %6) #2
+  %7 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp1 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %7) #2
+  %8 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 2
+  %9 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp1 to %union.anon**
+  store %union.anon* %8, %union.anon** %9, align 8, !tbaa !52
+  %10 = bitcast %union.anon* %8 to i8*
+  %11 = bitcast i64* %__dnew.i.i.i.i55 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %11) #2
+  store i64 34, i64* %__dnew.i.i.i.i55, align 8, !tbaa !12
+  %call5.i.i.i.i60 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp1, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i55, i64 0) #2
+  %_M_p.i13.i.i.i.i61 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i60, i8** %_M_p.i13.i.i.i.i61, align 8, !tbaa !56
+  %12 = load i64, i64* %__dnew.i.i.i.i55, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i.i.i62 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 2, i32 0
+  store i64 %12, i64* %_M_allocated_capacity.i.i.i.i.i62, align 8, !tbaa !12
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i60, i8* nonnull getelementptr inbounds ([35 x i8], [35 x i8]* @.str.24, i64 0, i64 0), i64 34, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i68 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 1
+  store i64 %12, i64* %_M_string_length.i.i.i.i.i.i68, align 8, !tbaa !53
+  %arrayidx.i.i.i.i.i69 = getelementptr inbounds i8, i8* %call5.i.i.i.i60, i64 %12
+  store i8 0, i8* %arrayidx.i.i.i.i.i69, align 1, !tbaa !36
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %11) #2
+  %13 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !53, !noalias !62
+  %14 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56, !noalias !62
+  %call3.i.i.i = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp1, i64 0, i64 0, i8* %14, i64 %13) #2, !noalias !62
+  %15 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 2
+  %16 = bitcast %"class.std::__cxx11::basic_string"* %input_data_path to %union.anon**
+  store %union.anon* %15, %union.anon** %16, align 8, !tbaa !52, !alias.scope !62
+  %_M_p.i.i23.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 0, i32 0
+  %17 = load i8*, i8** %_M_p.i.i23.i.i, align 8, !tbaa !56
+  %18 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 2
+  %arraydecay.i.i.i.i71 = bitcast %union.anon* %18 to i8*
+  %cmp.i.i.i72 = icmp eq i8* %17, %arraydecay.i.i.i.i71
+  br i1 %cmp.i.i.i72, label %if.then.i.i73, label %if.else.i.i
+
+if.then.i.i73:                                    ; preds = %entry
+  %arraydecay.i.i.i = bitcast %union.anon* %15 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i, i8* %17, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+
+if.else.i.i:                                      ; preds = %entry
+  %_M_p.i21.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 0, i32 0
+  store i8* %17, i8** %_M_p.i21.i.i, align 8, !tbaa !56, !alias.scope !62
+  %_M_allocated_capacity.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 2, i32 0
+  %19 = load i64, i64* %_M_allocated_capacity.i.i, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 2, i32 0
+  store i64 %19, i64* %_M_allocated_capacity.i.i.i, align 8, !tbaa !12, !alias.scope !62
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit: ; preds = %if.then.i.i73, %if.else.i.i
+  %_M_string_length.i20.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 1
+  %20 = load i64, i64* %_M_string_length.i20.i.i, align 8, !tbaa !53
+  %_M_string_length.i.i2.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 1
+  store i64 %20, i64* %_M_string_length.i.i2.i, align 8, !tbaa !53, !alias.scope !62
+  %21 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i to %union.anon**
+  store %union.anon* %18, %union.anon** %21, align 8, !tbaa !56
+  store i64 0, i64* %_M_string_length.i20.i.i, align 8, !tbaa !53
+  store i8 0, i8* %arraydecay.i.i.i.i71, align 1, !tbaa !36
+  %22 = load i8*, i8** %_M_p.i13.i.i.i.i61, align 8, !tbaa !56
+  %cmp.i.i.i76 = icmp eq i8* %22, %10
+  br i1 %cmp.i.i.i76, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78, label %if.then.i.i77
+
+if.then.i.i77:                                    ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+  call void @_ZdlPv(i8* %22) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78: ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit, %if.then.i.i77
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %7) #2
+  %23 = bitcast %"class.std::__cxx11::basic_string"* %conv1_w_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %23) #2
+  %24 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp3 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %24) #2
+  %25 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 2
+  %26 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp3 to %union.anon**
+  store %union.anon* %25, %union.anon** %26, align 8, !tbaa !52
+  %27 = bitcast %union.anon* %25 to i8*
+  %28 = bitcast i64* %__dnew.i.i.i.i84 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %28) #2
+  store i64 22, i64* %__dnew.i.i.i.i84, align 8, !tbaa !12
+  %call5.i.i.i.i89 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp3, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i84, i64 0) #2
+  %_M_p.i13.i.i.i.i90 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i89, i8** %_M_p.i13.i.i.i.i90, align 8, !tbaa !56
+  %29 = load i64, i64* %__dnew.i.i.i.i84, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i.i.i91 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 2, i32 0
+  store i64 %29, i64* %_M_allocated_capacity.i.i.i.i.i91, align 8, !tbaa !12
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i89, i8* nonnull getelementptr inbounds ([23 x i8], [23 x i8]* @.str.25, i64 0, i64 0), i64 22, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i97 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 1
+  store i64 %29, i64* %_M_string_length.i.i.i.i.i.i97, align 8, !tbaa !53
+  %30 = load i8*, i8** %_M_p.i13.i.i.i.i90, align 8, !tbaa !56
+  %arrayidx.i.i.i.i.i98 = getelementptr inbounds i8, i8* %30, i64 %29
+  store i8 0, i8* %arrayidx.i.i.i.i.i98, align 1, !tbaa !36
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %28) #2
+  %31 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !53, !noalias !65
+  %32 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56, !noalias !65
+  %call3.i.i.i102 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp3, i64 0, i64 0, i8* %32, i64 %31) #2, !noalias !65
+  %33 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 2
+  %34 = bitcast %"class.std::__cxx11::basic_string"* %conv1_w_path to %union.anon**
+  store %union.anon* %33, %union.anon** %34, align 8, !tbaa !52, !alias.scope !65
+  %_M_p.i.i23.i.i103 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 0, i32 0
+  %35 = load i8*, i8** %_M_p.i.i23.i.i103, align 8, !tbaa !56
+  %36 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 2
+  %arraydecay.i.i.i.i104 = bitcast %union.anon* %36 to i8*
+  %cmp.i.i.i105 = icmp eq i8* %35, %arraydecay.i.i.i.i104
+  br i1 %cmp.i.i.i105, label %if.then.i.i107, label %if.else.i.i111
+
+if.then.i.i107:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78
+  %arraydecay.i.i.i106 = bitcast %union.anon* %33 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i106, i8* %35, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+
+if.else.i.i111:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78
+  %_M_p.i21.i.i108 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 0, i32 0
+  store i8* %35, i8** %_M_p.i21.i.i108, align 8, !tbaa !56, !alias.scope !65
+  %_M_allocated_capacity.i.i109 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 2, i32 0
+  %37 = load i64, i64* %_M_allocated_capacity.i.i109, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i110 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 2, i32 0
+  store i64 %37, i64* %_M_allocated_capacity.i.i.i110, align 8, !tbaa !12, !alias.scope !65
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114: ; preds = %if.then.i.i107, %if.else.i.i111
+  %_M_string_length.i20.i.i112 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 1
+  %38 = load i64, i64* %_M_string_length.i20.i.i112, align 8, !tbaa !53
+  %_M_string_length.i.i2.i113 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 1
+  store i64 %38, i64* %_M_string_length.i.i2.i113, align 8, !tbaa !53, !alias.scope !65
+  %39 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i102 to %union.anon**
+  store %union.anon* %36, %union.anon** %39, align 8, !tbaa !56
+  store i64 0, i64* %_M_string_length.i20.i.i112, align 8, !tbaa !53
+  store i8 0, i8* %arraydecay.i.i.i.i104, align 1, !tbaa !36
+  %40 = load i8*, i8** %_M_p.i13.i.i.i.i90, align 8, !tbaa !56
+  %cmp.i.i.i117 = icmp eq i8* %40, %27
+  br i1 %cmp.i.i.i117, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119, label %if.then.i.i118
+
+if.then.i.i118:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+  call void @_ZdlPv(i8* %40) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119: ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114, %if.then.i.i118
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %24) #2
+  %41 = bitcast %"class.std::__cxx11::basic_string"* %conv1_b_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %41) #2
+  %42 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp5 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %42) #2
+  %43 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 2
+  %44 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp5 to %union.anon**
+  store %union.anon* %43, %union.anon** %44, align 8, !tbaa !52
+  %45 = bitcast %union.anon* %43 to i8*
+  %46 = bitcast i64* %__dnew.i.i.i.i125 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %46) #2
+  store i64 27, i64* %__dnew.i.i.i.i125, align 8, !tbaa !12
+  %call5.i.i.i.i130 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp5, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i125, i64 0) #2
+  %_M_p.i13.i.i.i.i131 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i130, i8** %_M_p.i13.i.i.i.i131, align 8, !tbaa !56
+  %47 = load i64, i64* %__dnew.i.i.i.i125, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i.i.i132 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 2, i32 0
+  store i64 %47, i64* %_M_allocated_capacity.i.i.i.i.i132, align 8, !tbaa !12
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i130, i8* nonnull getelementptr inbounds ([28 x i8], [28 x i8]* @.str.26, i64 0, i64 0), i64 27, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i138 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 1
+  store i64 %47, i64* %_M_string_length.i.i.i.i.i.i138, align 8, !tbaa !53
+  %48 = load i8*, i8** %_M_p.i13.i.i.i.i131, align 8, !tbaa !56
+  %arrayidx.i.i.i.i.i139 = getelementptr inbounds i8, i8* %48, i64 %47
+  store i8 0, i8* %arrayidx.i.i.i.i.i139, align 1, !tbaa !36
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %46) #2
+  %49 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !53, !noalias !68
+  %50 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56, !noalias !68
+  %call3.i.i.i143 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp5, i64 0, i64 0, i8* %50, i64 %49) #2, !noalias !68
+  %51 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 2
+  %52 = bitcast %"class.std::__cxx11::basic_string"* %conv1_b_path to %union.anon**
+  store %union.anon* %51, %union.anon** %52, align 8, !tbaa !52, !alias.scope !68
+  %_M_p.i.i23.i.i144 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 0, i32 0
+  %53 = load i8*, i8** %_M_p.i.i23.i.i144, align 8, !tbaa !56
+  %54 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 2
+  %arraydecay.i.i.i.i145 = bitcast %union.anon* %54 to i8*
+  %cmp.i.i.i146 = icmp eq i8* %53, %arraydecay.i.i.i.i145
+  br i1 %cmp.i.i.i146, label %if.then.i.i148, label %if.else.i.i152
+
+if.then.i.i148:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119
+  %arraydecay.i.i.i147 = bitcast %union.anon* %51 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i147, i8* %53, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+
+if.else.i.i152:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119
+  %_M_p.i21.i.i149 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 0, i32 0
+  store i8* %53, i8** %_M_p.i21.i.i149, align 8, !tbaa !56, !alias.scope !68
+  %_M_allocated_capacity.i.i150 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 2, i32 0
+  %55 = load i64, i64* %_M_allocated_capacity.i.i150, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i151 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 2, i32 0
+  store i64 %55, i64* %_M_allocated_capacity.i.i.i151, align 8, !tbaa !12, !alias.scope !68
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155: ; preds = %if.then.i.i148, %if.else.i.i152
+  %_M_string_length.i20.i.i153 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 1
+  %56 = load i64, i64* %_M_string_length.i20.i.i153, align 8, !tbaa !53
+  %_M_string_length.i.i2.i154 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 1
+  store i64 %56, i64* %_M_string_length.i.i2.i154, align 8, !tbaa !53, !alias.scope !68
+  %57 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i143 to %union.anon**
+  store %union.anon* %54, %union.anon** %57, align 8, !tbaa !56
+  store i64 0, i64* %_M_string_length.i20.i.i153, align 8, !tbaa !53
+  store i8 0, i8* %arraydecay.i.i.i.i145, align 1, !tbaa !36
+  %58 = load i8*, i8** %_M_p.i13.i.i.i.i131, align 8, !tbaa !56
+  %cmp.i.i.i158 = icmp eq i8* %58, %45
+  br i1 %cmp.i.i.i158, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160, label %if.then.i.i159
+
+if.then.i.i159:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+  call void @_ZdlPv(i8* %58) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160: ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155, %if.then.i.i159
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %42) #2
+  %59 = bitcast %"class.std::__cxx11::basic_string"* %conv2_w_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %59) #2
+  %60 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp7 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %60) #2
+  %61 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 2
+  %62 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp7 to %union.anon**
+  store %union.anon* %61, %union.anon** %62, align 8, !tbaa !52
+  %63 = bitcast %union.anon* %61 to i8*
+  %64 = bitcast i64* %__dnew.i.i.i.i166 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %64) #2
+  store i64 22, i64* %__dnew.i.i.i.i166, align 8, !tbaa !12
+  %call5.i.i.i.i171 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp7, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i166, i64 0) #2
+  %_M_p.i13.i.i.i.i172 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i171, i8** %_M_p.i13.i.i.i.i172, align 8, !tbaa !56
+  %65 = load i64, i64* %__dnew.i.i.i.i166, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i.i.i173 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 2, i32 0
+  store i64 %65, i64* %_M_allocated_capacity.i.i.i.i.i173, align 8, !tbaa !12
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i171, i8* nonnull getelementptr inbounds ([23 x i8], [23 x i8]* @.str.27, i64 0, i64 0), i64 22, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i179 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 1
+  store i64 %65, i64* %_M_string_length.i.i.i.i.i.i179, align 8, !tbaa !53
+  %66 = load i8*, i8** %_M_p.i13.i.i.i.i172, align 8, !tbaa !56
+  %arrayidx.i.i.i.i.i180 = getelementptr inbounds i8, i8* %66, i64 %65
+  store i8 0, i8* %arrayidx.i.i.i.i.i180, align 1, !tbaa !36
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %64) #2
+  %67 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !53, !noalias !71
+  %68 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56, !noalias !71
+  %call3.i.i.i184 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp7, i64 0, i64 0, i8* %68, i64 %67) #2, !noalias !71
+  %69 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 2
+  %70 = bitcast %"class.std::__cxx11::basic_string"* %conv2_w_path to %union.anon**
+  store %union.anon* %69, %union.anon** %70, align 8, !tbaa !52, !alias.scope !71
+  %_M_p.i.i23.i.i185 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 0, i32 0
+  %71 = load i8*, i8** %_M_p.i.i23.i.i185, align 8, !tbaa !56
+  %72 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 2
+  %arraydecay.i.i.i.i186 = bitcast %union.anon* %72 to i8*
+  %cmp.i.i.i187 = icmp eq i8* %71, %arraydecay.i.i.i.i186
+  br i1 %cmp.i.i.i187, label %if.then.i.i189, label %if.else.i.i193
+
+if.then.i.i189:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160
+  %arraydecay.i.i.i188 = bitcast %union.anon* %69 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i188, i8* %71, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+
+if.else.i.i193:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160
+  %_M_p.i21.i.i190 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 0, i32 0
+  store i8* %71, i8** %_M_p.i21.i.i190, align 8, !tbaa !56, !alias.scope !71
+  %_M_allocated_capacity.i.i191 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 2, i32 0
+  %73 = load i64, i64* %_M_allocated_capacity.i.i191, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i192 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 2, i32 0
+  store i64 %73, i64* %_M_allocated_capacity.i.i.i192, align 8, !tbaa !12, !alias.scope !71
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196: ; preds = %if.then.i.i189, %if.else.i.i193
+  %_M_string_length.i20.i.i194 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 1
+  %74 = load i64, i64* %_M_string_length.i20.i.i194, align 8, !tbaa !53
+  %_M_string_length.i.i2.i195 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 1
+  store i64 %74, i64* %_M_string_length.i.i2.i195, align 8, !tbaa !53, !alias.scope !71
+  %75 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i184 to %union.anon**
+  store %union.anon* %72, %union.anon** %75, align 8, !tbaa !56
+  store i64 0, i64* %_M_string_length.i20.i.i194, align 8, !tbaa !53
+  store i8 0, i8* %arraydecay.i.i.i.i186, align 1, !tbaa !36
+  %76 = load i8*, i8** %_M_p.i13.i.i.i.i172, align 8, !tbaa !56
+  %cmp.i.i.i199 = icmp eq i8* %76, %63
+  br i1 %cmp.i.i.i199, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201, label %if.then.i.i200
+
+if.then.i.i200:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+  call void @_ZdlPv(i8* %76) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201: ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196, %if.then.i.i200
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %60) #2
+  %77 = bitcast %"class.std::__cxx11::basic_string"* %conv2_b_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %77) #2
+  %78 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp9 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %78) #2
+  %79 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 2
+  %80 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp9 to %union.anon**
+  store %union.anon* %79, %union.anon** %80, align 8, !tbaa !52
+  %81 = bitcast %union.anon* %79 to i8*
+  %82 = bitcast i64* %__dnew.i.i.i.i207 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %82) #2
+  store i64 27, i64* %__dnew.i.i.i.i207, align 8, !tbaa !12
+  %call5.i.i.i.i212 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp9, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i207, i64 0) #2
+  %_M_p.i13.i.i.i.i213 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i212, i8** %_M_p.i13.i.i.i.i213, align 8, !tbaa !56
+  %83 = load i64, i64* %__dnew.i.i.i.i207, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i.i.i214 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 2, i32 0
+  store i64 %83, i64* %_M_allocated_capacity.i.i.i.i.i214, align 8, !tbaa !12
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i212, i8* nonnull getelementptr inbounds ([28 x i8], [28 x i8]* @.str.28, i64 0, i64 0), i64 27, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i220 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 1
+  store i64 %83, i64* %_M_string_length.i.i.i.i.i.i220, align 8, !tbaa !53
+  %84 = load i8*, i8** %_M_p.i13.i.i.i.i213, align 8, !tbaa !56
+  %arrayidx.i.i.i.i.i221 = getelementptr inbounds i8, i8* %84, i64 %83
+  store i8 0, i8* %arrayidx.i.i.i.i.i221, align 1, !tbaa !36
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %82) #2
+  %85 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !53, !noalias !74
+  %86 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56, !noalias !74
+  %call3.i.i.i225 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp9, i64 0, i64 0, i8* %86, i64 %85) #2, !noalias !74
+  %87 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 2
+  %88 = bitcast %"class.std::__cxx11::basic_string"* %conv2_b_path to %union.anon**
+  store %union.anon* %87, %union.anon** %88, align 8, !tbaa !52, !alias.scope !74
+  %_M_p.i.i23.i.i226 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 0, i32 0
+  %89 = load i8*, i8** %_M_p.i.i23.i.i226, align 8, !tbaa !56
+  %90 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 2
+  %arraydecay.i.i.i.i227 = bitcast %union.anon* %90 to i8*
+  %cmp.i.i.i228 = icmp eq i8* %89, %arraydecay.i.i.i.i227
+  br i1 %cmp.i.i.i228, label %if.then.i.i230, label %if.else.i.i234
+
+if.then.i.i230:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201
+  %arraydecay.i.i.i229 = bitcast %union.anon* %87 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i229, i8* %89, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+
+if.else.i.i234:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201
+  %_M_p.i21.i.i231 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 0, i32 0
+  store i8* %89, i8** %_M_p.i21.i.i231, align 8, !tbaa !56, !alias.scope !74
+  %_M_allocated_capacity.i.i232 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 2, i32 0
+  %91 = load i64, i64* %_M_allocated_capacity.i.i232, align 8, !tbaa !12
+  %_M_allocated_capacity.i.i.i233 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 2, i32 0
+  store i64 %91, i64* %_M_allocated_capacity.i.i.i233, align 8, !tbaa !12, !alias.scope !74
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237: ; preds = %if.then.i.i230, %if.else.i.i234
+  %_M_string_length.i20.i.i235 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 1
+  %92 = load i64, i64* %_M_string_length.i20.i.i235, align 8, !tbaa !53
+  %_M_string_length.i.i2.i236 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 1
+  store i64 %92, i64* %_M_string_length.i.i2.i236, align 8, !tbaa !53, !alias.scope !74
+  %93 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i225 to %union.anon**
+  store %union.anon* %90, %union.anon** %93, align 8, !tbaa !56
+  store i64 0, i64* %_M_string_length.i20.i.i235, align 8, !tbaa !53
+  store i8 0, i8* %arraydecay.i.i.i.i227, align 1, !tbaa !36
+  %94 = load i8*, i8** %_M_p.i13.i.i.i.i213, align 8, !tbaa !56
+  %cmp.i.i.i240 = icmp eq i8* %94, %81
+  br i1 %cmp.i.i.i240, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242, label %if.then.i.i241
+
+if.then.i.i241:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+  call void @_ZdlPv(i8* %94) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242: ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237, %if.then.i.i241
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %78) #2
+  %_M_p.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 0, i32 0
+  %95 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.29, i64 0, i64 0), i8* %95)
+  %96 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %call13 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %96, i32 0, i32 1000, i32 1, i32 28, i32 28)
+  %_M_p.i.i245 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 0, i32 0
+  %97 = load i8*, i8** %_M_p.i.i245, align 8, !tbaa !56
+  %call15 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %97, i32 0, i32 32, i32 1, i32 5, i32 5)
+  %_M_p.i.i247 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 0, i32 0
+  %98 = load i8*, i8** %_M_p.i.i247, align 8, !tbaa !56
+  %call17 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %98, i32 0, i32 1, i32 32, i32 1, i32 1)
+  %_M_p.i.i246 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 0, i32 0
+  %99 = load i8*, i8** %_M_p.i.i246, align 8, !tbaa !56
+  %call19 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %99, i32 0, i32 64, i32 32, i32 5, i32 5)
+  %_M_p.i.i244 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 0, i32 0
+  %100 = load i8*, i8** %_M_p.i.i244, align 8, !tbaa !56
+  %call21 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %100, i32 0, i32 1, i32 64, i32 1, i32 1)
+  call void @__visc__init() #2
+  %call22 = call noalias i8* @malloc(i64 96) #2
+  %x23 = bitcast i8* %call22 to i8**
+  %101 = bitcast i8* %call22 to %struct.Tensor**
+  store %struct.Tensor* %call13, %struct.Tensor** %101, align 1, !tbaa !77
+  %x_bytes = getelementptr inbounds i8, i8* %call22, i64 8
+  %102 = bitcast i8* %x_bytes to i64*
+  store i64 0, i64* %102, align 1, !tbaa !80
+  %conv1_w24 = getelementptr inbounds i8, i8* %call22, i64 16
+  %103 = bitcast i8* %conv1_w24 to %struct.Tensor**
+  store %struct.Tensor* %call15, %struct.Tensor** %103, align 1, !tbaa !81
+  %conv1_w_bytes = getelementptr inbounds i8, i8* %call22, i64 24
+  %104 = bitcast i8* %conv1_w_bytes to i64*
+  store i64 0, i64* %104, align 1, !tbaa !82
+  %conv1_b25 = getelementptr inbounds i8, i8* %call22, i64 32
+  %105 = bitcast i8* %conv1_b25 to %struct.Tensor**
+  store %struct.Tensor* %call17, %struct.Tensor** %105, align 1, !tbaa !83
+  %conv1_b_bytes = getelementptr inbounds i8, i8* %call22, i64 40
+  %106 = bitcast i8* %conv1_b_bytes to i64*
+  store i64 0, i64* %106, align 1, !tbaa !84
+  %conv2_w26 = getelementptr inbounds i8, i8* %call22, i64 48
+  %107 = bitcast i8* %conv2_w26 to %struct.Tensor**
+  store %struct.Tensor* %call19, %struct.Tensor** %107, align 1, !tbaa !85
+  %conv2_w_bytes = getelementptr inbounds i8, i8* %call22, i64 56
+  %108 = bitcast i8* %conv2_w_bytes to i64*
+  store i64 0, i64* %108, align 1, !tbaa !86
+  %conv2_b27 = getelementptr inbounds i8, i8* %call22, i64 64
+  %109 = bitcast i8* %conv2_b27 to %struct.Tensor**
+  store %struct.Tensor* %call21, %struct.Tensor** %109, align 1, !tbaa !87
+  %conv2_b_bytes = getelementptr inbounds i8, i8* %call22, i64 72
+  %110 = bitcast i8* %conv2_b_bytes to i64*
+  store i64 0, i64* %110, align 1, !tbaa !88
+  %call28 = call i8* (i32, ...) @__visc__launch(i32 0, void (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* nonnull @_Z4rootPvmS_mS_mS_mS_m, i8* %call22) #2
+  call void @__visc__wait(i8* %call28) #2
+  %111 = load i8*, i8** %x23, align 1, !tbaa !77
+  call void @hpvm_request_tensor(i8* %111, i32 0) #2
+  call void @__visc__cleanup() #2
+  %112 = load i8*, i8** %_M_p.i.i244, align 8, !tbaa !56
+  %arraydecay.i.i.i.i203 = bitcast %union.anon* %87 to i8*
+  %cmp.i.i.i204 = icmp eq i8* %112, %arraydecay.i.i.i.i203
+  br i1 %cmp.i.i.i204, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206, label %if.then.i.i205
+
+if.then.i.i205:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242
+  call void @_ZdlPv(i8* %112) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242, %if.then.i.i205
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %77) #2
+  %113 = load i8*, i8** %_M_p.i.i246, align 8, !tbaa !56
+  %arraydecay.i.i.i.i162 = bitcast %union.anon* %69 to i8*
+  %cmp.i.i.i163 = icmp eq i8* %113, %arraydecay.i.i.i.i162
+  br i1 %cmp.i.i.i163, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165, label %if.then.i.i164
+
+if.then.i.i164:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206
+  call void @_ZdlPv(i8* %113) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206, %if.then.i.i164
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %59) #2
+  %114 = load i8*, i8** %_M_p.i.i247, align 8, !tbaa !56
+  %arraydecay.i.i.i.i121 = bitcast %union.anon* %51 to i8*
+  %cmp.i.i.i122 = icmp eq i8* %114, %arraydecay.i.i.i.i121
+  br i1 %cmp.i.i.i122, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124, label %if.then.i.i123
+
+if.then.i.i123:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165
+  call void @_ZdlPv(i8* %114) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165, %if.then.i.i123
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %41) #2
+  %115 = load i8*, i8** %_M_p.i.i245, align 8, !tbaa !56
+  %arraydecay.i.i.i.i80 = bitcast %union.anon* %33 to i8*
+  %cmp.i.i.i81 = icmp eq i8* %115, %arraydecay.i.i.i.i80
+  br i1 %cmp.i.i.i81, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83, label %if.then.i.i82
+
+if.then.i.i82:                                    ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124
+  call void @_ZdlPv(i8* %115) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124, %if.then.i.i82
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %23) #2
+  %116 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !56
+  %arraydecay.i.i.i.i51 = bitcast %union.anon* %15 to i8*
+  %cmp.i.i.i52 = icmp eq i8* %116, %arraydecay.i.i.i.i51
+  br i1 %cmp.i.i.i52, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54, label %if.then.i.i53
+
+if.then.i.i53:                                    ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83
+  call void @_ZdlPv(i8* %116) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83, %if.then.i.i53
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %6) #2
+  %117 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !56
+  %cmp.i.i.i = icmp eq i8* %117, %3
+  br i1 %cmp.i.i.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54
+  call void @_ZdlPv(i8* %117) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit: ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54, %if.then.i.i
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %0) #2
+  ret i32 0
+}
+
+declare void @__visc__init() local_unnamed_addr #0
+
+declare i8* @__visc__launch(i32, ...) local_unnamed_addr #0
+
+declare void @__visc__wait(i8*) local_unnamed_addr #0
+
+declare void @__visc__cleanup() local_unnamed_addr #0
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdlPv(i8*) local_unnamed_addr #7
+
+declare void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"*, %"class.std::basic_streambuf"*) local_unnamed_addr #0
+
+; Function Attrs: nounwind
+declare void @_ZNSt8ios_baseC2Ev(%"class.std::ios_base"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare void @_ZNSt6localeC1Ev(%"class.std::locale"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare void @_ZNSt6localeD1Ev(%"class.std::locale"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare void @_ZNSt8ios_baseD2Ev(%"class.std::ios_base"*) unnamed_addr #1
+
+declare dereferenceable(272) %"class.std::basic_ostream"* @_ZNSo9_M_insertIdEERSoT_(%"class.std::basic_ostream"*, double) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+declare void @_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv(%"class.std::__cxx11::basic_string"* noalias sret, %"class.std::__cxx11::basic_stringbuf"*) local_unnamed_addr #3 align 2
+
+declare i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"*, i64* dereferenceable(8), i64) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #4
+
+declare dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"*, i64, i64, i8*, i64) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define internal void @_GLOBAL__sub_I_lenet.cpp() #3 section ".text.startup" {
+entry:
+  tail call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* nonnull @_ZStL8__ioinit) #2
+  %0 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #2
+
+declare i32 @putchar(i32)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #4
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { argmemonly nounwind }
+attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #6 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #7 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #8 = { noreturn nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 4.0.1 "}
+!1 = !{!2, !7, i64 40}
+!2 = !{!"_ZTS6Tensor", !3, i64 0, !3, i64 4, !6, i64 8, !7, i64 16, !7, i64 24, !7, i64 32, !7, i64 40, !8, i64 48, !8, i64 56, !9, i64 64}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
+!6 = !{!"_ZTS15data_location_t", !4, i64 0}
+!7 = !{!"any pointer", !4, i64 0}
+!8 = !{!"long", !4, i64 0}
+!9 = !{!"_ZTS9Dimension", !3, i64 0, !7, i64 8}
+!10 = !{!2, !3, i64 64}
+!11 = !{!2, !7, i64 72}
+!12 = !{!8, !8, i64 0}
+!13 = !{!2, !8, i64 48}
+!14 = !{!2, !8, i64 56}
+!15 = !{!2, !7, i64 32}
+!16 = !{!2, !3, i64 0}
+!17 = !{!18, !18, i64 0}
+!18 = !{!"float", !4, i64 0}
+!19 = distinct !{!19, !20}
+!20 = !{!"llvm.loop.unroll.disable"}
+!21 = distinct !{!21, !22, !23}
+!22 = !{!"llvm.loop.vectorize.width", i32 1}
+!23 = !{!"llvm.loop.interleave.count", i32 1}
+!24 = distinct !{!24, !22, !23}
+!25 = distinct !{!25, !20}
+!26 = distinct !{!26, !22, !23}
+!27 = distinct !{!27, !20}
+!28 = distinct !{!28, !22, !23}
+!29 = distinct !{!29, !22, !23}
+!30 = distinct !{!30, !22, !23}
+!31 = distinct !{!31, !20}
+!32 = distinct !{!32, !22, !23}
+!33 = distinct !{!33, !22, !23}
+!34 = distinct !{!34, !22, !23}
+!35 = distinct !{!35, !22, !23}
+!36 = !{!4, !4, i64 0}
+!37 = distinct !{!37, !22, !23}
+!38 = distinct !{!38, !39, !22, !23}
+!39 = !{!"llvm.loop.unroll.runtime.disable"}
+!40 = !{!41, !41, i64 0}
+!41 = !{!"vtable pointer", !5, i64 0}
+!42 = !{!43, !7, i64 216}
+!43 = !{!"_ZTSSt9basic_iosIcSt11char_traitsIcEE", !7, i64 216, !4, i64 224, !44, i64 225, !7, i64 232, !7, i64 240, !7, i64 248, !7, i64 256}
+!44 = !{!"bool", !4, i64 0}
+!45 = !{!43, !4, i64 224}
+!46 = !{!43, !44, i64 225}
+!47 = !{!48, !49, i64 64}
+!48 = !{!"_ZTSNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE", !49, i64 64, !50, i64 72}
+!49 = !{!"_ZTSSt13_Ios_Openmode", !4, i64 0}
+!50 = !{!"_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE", !51, i64 0, !8, i64 8, !4, i64 16}
+!51 = !{!"_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderE", !7, i64 0}
+!52 = !{!51, !7, i64 0}
+!53 = !{!50, !8, i64 8}
+!54 = !{!55, !55, i64 0}
+!55 = !{!"_ZTSSt13_Ios_Fmtflags", !4, i64 0}
+!56 = !{!50, !7, i64 0}
+!57 = !{!58, !55, i64 24}
+!58 = !{!"_ZTSSt8ios_base", !8, i64 8, !8, i64 16, !55, i64 24, !59, i64 28, !59, i64 32, !7, i64 40, !60, i64 48, !4, i64 64, !3, i64 192, !7, i64 200, !61, i64 208}
+!59 = !{!"_ZTSSt12_Ios_Iostate", !4, i64 0}
+!60 = !{!"_ZTSNSt8ios_base6_WordsE", !7, i64 0, !8, i64 8}
+!61 = !{!"_ZTSSt6locale", !7, i64 0}
+!62 = !{!63}
+!63 = distinct !{!63, !64, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!64 = distinct !{!64, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!65 = !{!66}
+!66 = distinct !{!66, !67, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!67 = distinct !{!67, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!68 = !{!69}
+!69 = distinct !{!69, !70, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!70 = distinct !{!70, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!71 = !{!72}
+!72 = distinct !{!72, !73, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!73 = distinct !{!73, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!74 = !{!75}
+!75 = distinct !{!75, !76, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!76 = distinct !{!76, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!77 = !{!78, !7, i64 0}
+!78 = !{!"_ZTS6RootIn", !7, i64 0, !8, i64 8, !7, i64 16, !8, i64 24, !7, i64 32, !8, i64 40, !7, i64 48, !8, i64 56, !7, i64 64, !8, i64 72, !79, i64 80}
+!79 = !{!"_ZTS5ret_t", !7, i64 0, !8, i64 8}
+!80 = !{!78, !8, i64 8}
+!81 = !{!78, !7, i64 16}
+!82 = !{!78, !8, i64 24}
+!83 = !{!78, !7, i64 32}
+!84 = !{!78, !8, i64 40}
+!85 = !{!78, !7, i64 48}
+!86 = !{!78, !8, i64 56}
+!87 = !{!78, !7, i64 64}
+!88 = !{!78, !8, i64 72}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.opt.bc b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.opt.bc
new file mode 100644
index 0000000000000000000000000000000000000000..f60bfde0831df8771efc86fa6a08b42e6a659a1b
Binary files /dev/null and b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.opt.bc differ
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.visc.ll b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.visc.ll
new file mode 100644
index 0000000000000000000000000000000000000000..05be5bbbc7aa9cd79ecd7af946850c107305d39a
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet.visc.ll
@@ -0,0 +1,2658 @@
+; ModuleID = 'build/lenet.ll'
+source_filename = "src/lenet.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"class.std::ios_base::Init" = type { i8 }
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+%struct.Tensor = type { i32, i32, i32, %struct.cudnnTensorStruct*, %struct.cudnnFilterStruct*, i8*, i8*, i64, i64, %struct.Dimension }
+%struct.cudnnTensorStruct = type opaque
+%struct.cudnnFilterStruct = type opaque
+%struct.Dimension = type { i32, i64* }
+%"class.std::__cxx11::basic_ostringstream" = type { %"class.std::basic_ostream.base", %"class.std::__cxx11::basic_stringbuf", %"class.std::basic_ios" }
+%"class.std::basic_ostream.base" = type { i32 (...)** }
+%"class.std::__cxx11::basic_stringbuf" = type { %"class.std::basic_streambuf", i32, %"class.std::__cxx11::basic_string" }
+%"class.std::basic_streambuf" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"class.std::locale" }
+%"class.std::locale" = type { %"class.std::locale::_Impl"* }
+%"class.std::locale::_Impl" = type { i32, %"class.std::locale::facet"**, i64, %"class.std::locale::facet"**, i8** }
+%"class.std::locale::facet" = type <{ i32 (...)**, i32, [4 x i8] }>
+%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider", i64, %union.anon }
+%"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%union.anon = type { i64, [8 x i8] }
+%"class.std::basic_ios" = type { %"class.std::ios_base", %"class.std::basic_ostream"*, i8, i8, %"class.std::basic_streambuf"*, %"class.std::ctype"*, %"class.std::num_put"*, %"class.std::num_get"* }
+%"class.std::ios_base" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"class.std::locale" }
+%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"class.std::ios_base"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words" = type { i8*, i64 }
+%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_ios" }
+%"class.std::ctype" = type <{ %"class.std::locale::facet.base", [4 x i8], %struct.__locale_struct*, i8, [7 x i8], i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8, [6 x i8] }>
+%"class.std::locale::facet.base" = type <{ i32 (...)**, i32 }>
+%struct.__locale_struct = type { [13 x %struct.__locale_data*], i16*, i32*, i32*, [13 x i8*] }
+%struct.__locale_data = type opaque
+%"class.std::num_put" = type { %"class.std::locale::facet.base", [4 x i8] }
+%"class.std::num_get" = type { %"class.std::locale::facet.base", [4 x i8] }
+%struct.out._Z14tensorConvNodePvmS_m = type <{ i8*, i64 }>
+%struct.out._Z13tensorAddNodePvmS_m = type <{ i8*, i64 }>
+%struct.out._Z4rootPvmS_mS_mS_mS_m = type <{ i8*, i64 }>
+
+@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
+@__dso_handle = external global i8
+@.str.1 = private unnamed_addr constant [19 x i8] c"tensor dims = %d \0A\00", align 1
+@.str.2 = private unnamed_addr constant [18 x i8] c"dim1_size = %zu \0A\00", align 1
+@.str.3 = private unnamed_addr constant [18 x i8] c"dim2_size = %zu \0A\00", align 1
+@.str.4 = private unnamed_addr constant [18 x i8] c"num_elems = %zu \0A\00", align 1
+@.str.5 = private unnamed_addr constant [3 x i8] c"wb\00", align 1
+@.str.6 = private unnamed_addr constant [58 x i8] c"File %s could not be created. Check if directory exists \0A\00", align 1
+@.str.7 = private unnamed_addr constant [22 x i8] c"size_in_bytes = %zu \0A\00", align 1
+@.str.8 = private unnamed_addr constant [21 x i8] c"bytes_written = %zu\0A\00", align 1
+@.str.9 = private unnamed_addr constant [4 x i8] c"%f,\00", align 1
+@.str.11 = private unnamed_addr constant [18 x i8] c"Num_elems = %zu \0A\00", align 1
+@.str.12 = private unnamed_addr constant [16 x i8] c"dim[%d] = %zu \0A\00", align 1
+@.str.13 = private unnamed_addr constant [35 x i8] c"Tensor data mismatch at index %d \0A\00", align 1
+@.str.14 = private unnamed_addr constant [21 x i8] c"Tensor data mismatch\00", align 1
+@.str.15 = private unnamed_addr constant [3 x i8] c"rb\00", align 1
+@.str.16 = private unnamed_addr constant [41 x i8] c"Data file %s is not found. Aborting... \0A\00", align 1
+@.str.17 = private unnamed_addr constant [23 x i8] c"tensor_data[%d] = %f \0A\00", align 1
+@.str.18 = private unnamed_addr constant [40 x i8] c"Data file %s is not found. Aborting...\0A\00", align 1
+@.str.19 = private unnamed_addr constant [28 x i8] c"--labels bytes_read = %zu \0A\00", align 1
+@.str.20 = private unnamed_addr constant [24 x i8] c"****** Accuracy = %f \0A\0A\00", align 1
+@.str.21 = private unnamed_addr constant [15 x i8] c"final_accuracy\00", align 1
+@.str.22 = private unnamed_addr constant [3 x i8] c"w+\00", align 1
+@.str.23 = private unnamed_addr constant [55 x i8] c"../../../../../../projects/hpvm-tensor-rt/model_params\00", align 1
+@.str.24 = private unnamed_addr constant [35 x i8] c"/FC_network2/mnist_float_input.bin\00", align 1
+@.str.25 = private unnamed_addr constant [23 x i8] c"/lenet_keras/conv1.bin\00", align 1
+@.str.26 = private unnamed_addr constant [28 x i8] c"/lenet_keras/conv1_bias.bin\00", align 1
+@.str.27 = private unnamed_addr constant [23 x i8] c"/lenet_keras/conv2.bin\00", align 1
+@.str.28 = private unnamed_addr constant [28 x i8] c"/lenet_keras/conv2_bias.bin\00", align 1
+@.str.29 = private unnamed_addr constant [31 x i8] c"Reading Input Data from = %s \0A\00", align 1
+@_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE = external unnamed_addr constant { [5 x i8*], [5 x i8*] }
+@_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE = external unnamed_addr constant [4 x i8*]
+@_ZTVSt9basic_iosIcSt11char_traitsIcEE = external unnamed_addr constant { [4 x i8*] }
+@_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE = external unnamed_addr constant { [16 x i8*] }
+@_ZTVSt15basic_streambufIcSt11char_traitsIcEE = external unnamed_addr constant { [16 x i8*] }
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_lenet.cpp, i8* null }]
+@str = private unnamed_addr constant [23 x i8] c"Successful cudaMalloc \00"
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #0
+
+; Function Attrs: nounwind
+declare void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) local_unnamed_addr #2
+
+; Function Attrs: nounwind uwtable
+define void @_Z15printTensorInfoPv(i8* nocapture readonly %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  %gpu_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 40
+  %0 = bitcast i8* %gpu_data to i8**
+  %1 = load i8*, i8** %0, align 8, !tbaa !4
+  %cmp = icmp eq i8* %1, null
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @str, i64 0, i64 0))
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %dims = getelementptr inbounds i8, i8* %tensor_ptr, i64 64
+  %num_dims = bitcast i8* %dims to i32*
+  %2 = load i32, i32* %num_dims, align 8, !tbaa !13
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.1, i64 0, i64 0), i32 %2)
+  %dim_sizes = getelementptr inbounds i8, i8* %tensor_ptr, i64 72
+  %3 = bitcast i8* %dim_sizes to i64**
+  %4 = load i64*, i64** %3, align 8, !tbaa !14
+  %5 = load i64, i64* %4, align 8, !tbaa !15
+  %call3 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.2, i64 0, i64 0), i64 %5)
+  %6 = load i64*, i64** %3, align 8, !tbaa !14
+  %arrayidx6 = getelementptr inbounds i64, i64* %6, i64 1
+  %7 = load i64, i64* %arrayidx6, align 8, !tbaa !15
+  %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.3, i64 0, i64 0), i64 %7)
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %8 = bitcast i8* %num_elems to i64*
+  %9 = load i64, i64* %8, align 8, !tbaa !16
+  %call8 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.4, i64 0, i64 0), i64 %9)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #4
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #4
+
+; Function Attrs: nounwind uwtable
+define void @_Z17dumpWeightsToFilePcPv(i8* %file_name, i8* %weights_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %weights_ptr, i32 0) #2
+  %call = tail call %struct._IO_FILE* @fopen(i8* %file_name, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.5, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @.str.6, i64 0, i64 0), i8* %file_name)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %size_in_bytes = getelementptr inbounds i8, i8* %weights_ptr, i64 56
+  %0 = bitcast i8* %size_in_bytes to i64*
+  %1 = load i64, i64* %0, align 8, !tbaa !17
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.7, i64 0, i64 0), i64 %1)
+  %host_data = getelementptr inbounds i8, i8* %weights_ptr, i64 32
+  %2 = bitcast i8* %host_data to i8**
+  %3 = load i8*, i8** %2, align 8, !tbaa !18
+  %4 = load i64, i64* %0, align 8, !tbaa !17
+  %call4 = tail call i64 @fwrite(i8* %3, i64 1, i64 %4, %struct._IO_FILE* nonnull %call)
+  %call5 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.8, i64 0, i64 0), i64 %call4)
+  %call6 = tail call i32 @fclose(%struct._IO_FILE* nonnull %call)
+  ret void
+}
+
+declare void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #0
+
+; Function Attrs: nounwind
+declare noalias %struct._IO_FILE* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #1
+
+; Function Attrs: noreturn nounwind
+declare void @abort() local_unnamed_addr #5
+
+; Function Attrs: nounwind
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i32 @fclose(%struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define void @_Z18fillTensorWithOnesPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !19
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !18
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !16
+  %cmp110 = icmp eq i64 %4, 0
+  br i1 %cmp110, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 8
+  br i1 %min.iters.check, label %for.body.preheader22, label %min.iters.checked
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %4, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader22, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add i64 %4, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader22, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = add nuw nsw i64 %11, 1
+  %xtraiter = and i64 %12, 7
+  %lcmp.mod = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod, label %vector.body.prol.loopexit, label %vector.body.prol.preheader
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.prol.preheader
+  %index.prol = phi i64 [ %index.next.prol, %vector.body.prol ], [ 0, %vector.body.prol.preheader ]
+  %prol.iter = phi i64 [ %prol.iter.sub, %vector.body.prol ], [ %xtraiter, %vector.body.prol.preheader ]
+  %13 = getelementptr inbounds float, float* %2, i64 %index.prol
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %14, align 4, !tbaa !20
+  %15 = getelementptr float, float* %13, i64 4
+  %16 = bitcast float* %15 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %16, align 4, !tbaa !20
+  %index.next.prol = add i64 %index.prol, 8
+  %prol.iter.sub = add i64 %prol.iter, -1
+  %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+  br i1 %prol.iter.cmp, label %vector.body.prol.loopexit.unr-lcssa, label %vector.body.prol, !llvm.loop !22
+
+vector.body.prol.loopexit.unr-lcssa:              ; preds = %vector.body.prol
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol.loopexit.unr-lcssa, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.prol, %vector.body.prol.loopexit.unr-lcssa ]
+  %17 = icmp ult i64 %10, 56
+  br i1 %17, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.7, %vector.body ]
+  %18 = getelementptr inbounds float, float* %2, i64 %index
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %19, align 4, !tbaa !20
+  %20 = getelementptr float, float* %18, i64 4
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %21, align 4, !tbaa !20
+  %index.next = add i64 %index, 8
+  %22 = getelementptr inbounds float, float* %2, i64 %index.next
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %23, align 4, !tbaa !20
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %25, align 4, !tbaa !20
+  %index.next.1 = add i64 %index, 16
+  %26 = getelementptr inbounds float, float* %2, i64 %index.next.1
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %27, align 4, !tbaa !20
+  %28 = getelementptr float, float* %26, i64 4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %29, align 4, !tbaa !20
+  %index.next.2 = add i64 %index, 24
+  %30 = getelementptr inbounds float, float* %2, i64 %index.next.2
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %31, align 4, !tbaa !20
+  %32 = getelementptr float, float* %30, i64 4
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %33, align 4, !tbaa !20
+  %index.next.3 = add i64 %index, 32
+  %34 = getelementptr inbounds float, float* %2, i64 %index.next.3
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %35, align 4, !tbaa !20
+  %36 = getelementptr float, float* %34, i64 4
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %37, align 4, !tbaa !20
+  %index.next.4 = add i64 %index, 40
+  %38 = getelementptr inbounds float, float* %2, i64 %index.next.4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %39, align 4, !tbaa !20
+  %40 = getelementptr float, float* %38, i64 4
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %41, align 4, !tbaa !20
+  %index.next.5 = add i64 %index, 48
+  %42 = getelementptr inbounds float, float* %2, i64 %index.next.5
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %43, align 4, !tbaa !20
+  %44 = getelementptr float, float* %42, i64 4
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %45, align 4, !tbaa !20
+  %index.next.6 = add i64 %index, 56
+  %46 = getelementptr inbounds float, float* %2, i64 %index.next.6
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %47, align 4, !tbaa !20
+  %48 = getelementptr float, float* %46, i64 4
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %49, align 4, !tbaa !20
+  %index.next.7 = add i64 %index, 64
+  %50 = icmp eq i64 %index.next.7, %n.vec
+  br i1 %50, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !24
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.unr-lcssa, %vector.body.prol.loopexit
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %if.end, label %for.body.preheader22
+
+for.body.preheader22:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv12.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.011.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader22
+  %conv12 = phi i64 [ %conv, %for.body ], [ %conv12.ph, %for.body.preheader22 ]
+  %i.011 = phi i32 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader22 ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv12
+  store float 1.000000e+00, float* %arrayidx, align 4, !tbaa !20
+  %inc = add i32 %i.011, 1
+  %conv = zext i32 %inc to i64
+  %cmp1 = icmp ult i64 %conv, %4
+  br i1 %cmp1, label %for.body, label %if.end.loopexit, !llvm.loop !27
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block, %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z19fillWithOnesAndTwosPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !19
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !18
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !16
+  %div35 = lshr i64 %4, 1
+  %cmp136 = icmp eq i64 %div35, 0
+  br i1 %cmp136, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 16
+  br i1 %min.iters.check, label %for.body.preheader85, label %min.iters.checked
+
+for.body.preheader85:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv38.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.037.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %div35, 9223372036854775800
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader85, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add nsw i64 %div35, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader85, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add nsw i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = add nuw nsw i64 %11, 1
+  %xtraiter86 = and i64 %12, 7
+  %lcmp.mod87 = icmp eq i64 %xtraiter86, 0
+  br i1 %lcmp.mod87, label %vector.body.prol.loopexit, label %vector.body.prol.preheader
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.prol.preheader
+  %index.prol = phi i64 [ %index.next.prol, %vector.body.prol ], [ 0, %vector.body.prol.preheader ]
+  %prol.iter88 = phi i64 [ %prol.iter88.sub, %vector.body.prol ], [ %xtraiter86, %vector.body.prol.preheader ]
+  %13 = getelementptr inbounds float, float* %2, i64 %index.prol
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %14, align 4, !tbaa !20
+  %15 = getelementptr float, float* %13, i64 4
+  %16 = bitcast float* %15 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %16, align 4, !tbaa !20
+  %index.next.prol = add i64 %index.prol, 8
+  %prol.iter88.sub = add i64 %prol.iter88, -1
+  %prol.iter88.cmp = icmp eq i64 %prol.iter88.sub, 0
+  br i1 %prol.iter88.cmp, label %vector.body.prol.loopexit.unr-lcssa, label %vector.body.prol, !llvm.loop !28
+
+vector.body.prol.loopexit.unr-lcssa:              ; preds = %vector.body.prol
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol.loopexit.unr-lcssa, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.prol, %vector.body.prol.loopexit.unr-lcssa ]
+  %17 = icmp ult i64 %10, 56
+  br i1 %17, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.7, %vector.body ]
+  %18 = getelementptr inbounds float, float* %2, i64 %index
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %19, align 4, !tbaa !20
+  %20 = getelementptr float, float* %18, i64 4
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %21, align 4, !tbaa !20
+  %index.next = add i64 %index, 8
+  %22 = getelementptr inbounds float, float* %2, i64 %index.next
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %23, align 4, !tbaa !20
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %25, align 4, !tbaa !20
+  %index.next.1 = add i64 %index, 16
+  %26 = getelementptr inbounds float, float* %2, i64 %index.next.1
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %27, align 4, !tbaa !20
+  %28 = getelementptr float, float* %26, i64 4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %29, align 4, !tbaa !20
+  %index.next.2 = add i64 %index, 24
+  %30 = getelementptr inbounds float, float* %2, i64 %index.next.2
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %31, align 4, !tbaa !20
+  %32 = getelementptr float, float* %30, i64 4
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %33, align 4, !tbaa !20
+  %index.next.3 = add i64 %index, 32
+  %34 = getelementptr inbounds float, float* %2, i64 %index.next.3
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %35, align 4, !tbaa !20
+  %36 = getelementptr float, float* %34, i64 4
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %37, align 4, !tbaa !20
+  %index.next.4 = add i64 %index, 40
+  %38 = getelementptr inbounds float, float* %2, i64 %index.next.4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %39, align 4, !tbaa !20
+  %40 = getelementptr float, float* %38, i64 4
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %41, align 4, !tbaa !20
+  %index.next.5 = add i64 %index, 48
+  %42 = getelementptr inbounds float, float* %2, i64 %index.next.5
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %43, align 4, !tbaa !20
+  %44 = getelementptr float, float* %42, i64 4
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %45, align 4, !tbaa !20
+  %index.next.6 = add i64 %index, 56
+  %46 = getelementptr inbounds float, float* %2, i64 %index.next.6
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %47, align 4, !tbaa !20
+  %48 = getelementptr float, float* %46, i64 4
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %49, align 4, !tbaa !20
+  %index.next.7 = add i64 %index, 64
+  %50 = icmp eq i64 %index.next.7, %n.vec
+  br i1 %50, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !29
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.unr-lcssa, %vector.body.prol.loopexit
+  %cmp.n = icmp eq i64 %div35, %n.vec
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader85
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %middle.block, %if.then
+  %div.lcssa = phi i64 [ 0, %if.then ], [ %div35, %middle.block ], [ %div35, %for.cond.cleanup.loopexit ]
+  %conv731 = and i64 %div.lcssa, 4294967295
+  %cmp932 = icmp ult i64 %conv731, %4
+  br i1 %cmp932, label %for.body11.lr.ph, label %if.end
+
+for.body11.lr.ph:                                 ; preds = %for.cond.cleanup
+  %conv5 = trunc i64 %div.lcssa to i32
+  %51 = add nuw i64 %div.lcssa, 1
+  %52 = and i64 %51, 4294967295
+  %53 = icmp ugt i64 %4, %52
+  %umax = select i1 %53, i64 %4, i64 %52
+  %54 = add i64 %umax, 1
+  %55 = sub i64 %54, %52
+  %min.iters.check52 = icmp ult i64 %55, 8
+  br i1 %min.iters.check52, label %for.body11.preheader, label %min.iters.checked53
+
+for.body11.preheader:                             ; preds = %middle.block50, %vector.scevcheck65, %min.iters.checked53, %for.body11.lr.ph
+  %conv734.ph = phi i64 [ %conv731, %vector.scevcheck65 ], [ %conv731, %min.iters.checked53 ], [ %conv731, %for.body11.lr.ph ], [ %ind.end70, %middle.block50 ]
+  %i2.033.ph = phi i32 [ %conv5, %vector.scevcheck65 ], [ %conv5, %min.iters.checked53 ], [ %conv5, %for.body11.lr.ph ], [ %ind.end73, %middle.block50 ]
+  br label %for.body11
+
+min.iters.checked53:                              ; preds = %for.body11.lr.ph
+  %n.vec55 = and i64 %55, -8
+  %cmp.zero56 = icmp eq i64 %n.vec55, 0
+  br i1 %cmp.zero56, label %for.body11.preheader, label %vector.scevcheck65
+
+vector.scevcheck65:                               ; preds = %min.iters.checked53
+  %56 = add i32 %conv5, 1
+  %57 = zext i32 %56 to i64
+  %58 = icmp ugt i64 %4, %57
+  %umax58 = select i1 %58, i64 %4, i64 %57
+  %59 = sub i64 %umax58, %57
+  %60 = trunc i64 %59 to i32
+  %61 = add i32 %56, %60
+  %62 = icmp ult i32 %61, %56
+  %63 = icmp ugt i64 %59, 4294967295
+  %64 = or i1 %62, %63
+  %65 = trunc i64 %59 to i32
+  %66 = add i32 %conv5, %65
+  %67 = icmp ult i32 %66, %conv5
+  %68 = icmp ugt i64 %59, 4294967295
+  %69 = or i1 %67, %68
+  %70 = or i1 %64, %69
+  %ind.end70 = add i64 %conv731, %n.vec55
+  %cast.crd72 = trunc i64 %n.vec55 to i32
+  %ind.end73 = add i32 %conv5, %cast.crd72
+  br i1 %70, label %for.body11.preheader, label %vector.body49.preheader
+
+vector.body49.preheader:                          ; preds = %vector.scevcheck65
+  %71 = add i64 %n.vec55, -8
+  %72 = lshr exact i64 %71, 3
+  %73 = add nuw nsw i64 %72, 1
+  %xtraiter = and i64 %73, 3
+  %lcmp.mod = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod, label %vector.body49.prol.loopexit, label %vector.body49.prol.preheader
+
+vector.body49.prol.preheader:                     ; preds = %vector.body49.preheader
+  br label %vector.body49.prol
+
+vector.body49.prol:                               ; preds = %vector.body49.prol, %vector.body49.prol.preheader
+  %index67.prol = phi i64 [ %index.next68.prol, %vector.body49.prol ], [ 0, %vector.body49.prol.preheader ]
+  %prol.iter = phi i64 [ %prol.iter.sub, %vector.body49.prol ], [ %xtraiter, %vector.body49.prol.preheader ]
+  %74 = add i64 %conv731, %index67.prol
+  %75 = getelementptr inbounds float, float* %2, i64 %74
+  %76 = bitcast float* %75 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %76, align 4, !tbaa !20
+  %77 = getelementptr float, float* %75, i64 4
+  %78 = bitcast float* %77 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %78, align 4, !tbaa !20
+  %index.next68.prol = add i64 %index67.prol, 8
+  %prol.iter.sub = add i64 %prol.iter, -1
+  %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+  br i1 %prol.iter.cmp, label %vector.body49.prol.loopexit.unr-lcssa, label %vector.body49.prol, !llvm.loop !30
+
+vector.body49.prol.loopexit.unr-lcssa:            ; preds = %vector.body49.prol
+  br label %vector.body49.prol.loopexit
+
+vector.body49.prol.loopexit:                      ; preds = %vector.body49.prol.loopexit.unr-lcssa, %vector.body49.preheader
+  %index67.unr = phi i64 [ 0, %vector.body49.preheader ], [ %index.next68.prol, %vector.body49.prol.loopexit.unr-lcssa ]
+  %79 = icmp ult i64 %71, 24
+  br i1 %79, label %middle.block50, label %vector.body49.preheader.new
+
+vector.body49.preheader.new:                      ; preds = %vector.body49.prol.loopexit
+  br label %vector.body49
+
+vector.body49:                                    ; preds = %vector.body49, %vector.body49.preheader.new
+  %index67 = phi i64 [ %index67.unr, %vector.body49.preheader.new ], [ %index.next68.3, %vector.body49 ]
+  %80 = add i64 %conv731, %index67
+  %81 = getelementptr inbounds float, float* %2, i64 %80
+  %82 = bitcast float* %81 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %82, align 4, !tbaa !20
+  %83 = getelementptr float, float* %81, i64 4
+  %84 = bitcast float* %83 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %84, align 4, !tbaa !20
+  %index.next68 = add i64 %index67, 8
+  %85 = add i64 %conv731, %index.next68
+  %86 = getelementptr inbounds float, float* %2, i64 %85
+  %87 = bitcast float* %86 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %87, align 4, !tbaa !20
+  %88 = getelementptr float, float* %86, i64 4
+  %89 = bitcast float* %88 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %89, align 4, !tbaa !20
+  %index.next68.1 = add i64 %index67, 16
+  %90 = add i64 %conv731, %index.next68.1
+  %91 = getelementptr inbounds float, float* %2, i64 %90
+  %92 = bitcast float* %91 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %92, align 4, !tbaa !20
+  %93 = getelementptr float, float* %91, i64 4
+  %94 = bitcast float* %93 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %94, align 4, !tbaa !20
+  %index.next68.2 = add i64 %index67, 24
+  %95 = add i64 %conv731, %index.next68.2
+  %96 = getelementptr inbounds float, float* %2, i64 %95
+  %97 = bitcast float* %96 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %97, align 4, !tbaa !20
+  %98 = getelementptr float, float* %96, i64 4
+  %99 = bitcast float* %98 to <4 x float>*
+  store <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <4 x float>* %99, align 4, !tbaa !20
+  %index.next68.3 = add i64 %index67, 32
+  %100 = icmp eq i64 %index.next68.3, %n.vec55
+  br i1 %100, label %middle.block50.unr-lcssa, label %vector.body49, !llvm.loop !31
+
+middle.block50.unr-lcssa:                         ; preds = %vector.body49
+  br label %middle.block50
+
+middle.block50:                                   ; preds = %middle.block50.unr-lcssa, %vector.body49.prol.loopexit
+  %cmp.n74 = icmp eq i64 %55, %n.vec55
+  br i1 %cmp.n74, label %if.end, label %for.body11.preheader
+
+for.body:                                         ; preds = %for.body, %for.body.preheader85
+  %conv38 = phi i64 [ %conv, %for.body ], [ %conv38.ph, %for.body.preheader85 ]
+  %i.037 = phi i32 [ %inc, %for.body ], [ %i.037.ph, %for.body.preheader85 ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv38
+  store float 1.000000e+00, float* %arrayidx, align 4, !tbaa !20
+  %inc = add i32 %i.037, 1
+  %conv = zext i32 %inc to i64
+  %cmp1 = icmp ult i64 %conv, %div35
+  br i1 %cmp1, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !32
+
+for.body11:                                       ; preds = %for.body11, %for.body11.preheader
+  %conv734 = phi i64 [ %conv7, %for.body11 ], [ %conv734.ph, %for.body11.preheader ]
+  %i2.033 = phi i32 [ %inc15, %for.body11 ], [ %i2.033.ph, %for.body11.preheader ]
+  %arrayidx13 = getelementptr inbounds float, float* %2, i64 %conv734
+  store float 2.000000e+00, float* %arrayidx13, align 4, !tbaa !20
+  %inc15 = add i32 %i2.033, 1
+  %conv7 = zext i32 %inc15 to i64
+  %cmp9 = icmp ult i64 %conv7, %4
+  br i1 %cmp9, label %for.body11, label %if.end.loopexit, !llvm.loop !33
+
+if.end.loopexit:                                  ; preds = %for.body11
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block50, %for.cond.cleanup, %entry
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z21fillTensorWithNegOnesPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !19
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !18
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !16
+  %cmp110 = icmp eq i64 %4, 0
+  br i1 %cmp110, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 8
+  br i1 %min.iters.check, label %for.body.preheader22, label %min.iters.checked
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %4, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader22, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add i64 %4, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader22, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = add nuw nsw i64 %11, 1
+  %xtraiter = and i64 %12, 7
+  %lcmp.mod = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod, label %vector.body.prol.loopexit, label %vector.body.prol.preheader
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.prol.preheader
+  %index.prol = phi i64 [ %index.next.prol, %vector.body.prol ], [ 0, %vector.body.prol.preheader ]
+  %prol.iter = phi i64 [ %prol.iter.sub, %vector.body.prol ], [ %xtraiter, %vector.body.prol.preheader ]
+  %13 = getelementptr inbounds float, float* %2, i64 %index.prol
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %14, align 4, !tbaa !20
+  %15 = getelementptr float, float* %13, i64 4
+  %16 = bitcast float* %15 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %16, align 4, !tbaa !20
+  %index.next.prol = add i64 %index.prol, 8
+  %prol.iter.sub = add i64 %prol.iter, -1
+  %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+  br i1 %prol.iter.cmp, label %vector.body.prol.loopexit.unr-lcssa, label %vector.body.prol, !llvm.loop !34
+
+vector.body.prol.loopexit.unr-lcssa:              ; preds = %vector.body.prol
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol.loopexit.unr-lcssa, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ %index.next.prol, %vector.body.prol.loopexit.unr-lcssa ]
+  %17 = icmp ult i64 %10, 56
+  br i1 %17, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.7, %vector.body ]
+  %18 = getelementptr inbounds float, float* %2, i64 %index
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %19, align 4, !tbaa !20
+  %20 = getelementptr float, float* %18, i64 4
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %21, align 4, !tbaa !20
+  %index.next = add i64 %index, 8
+  %22 = getelementptr inbounds float, float* %2, i64 %index.next
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %23, align 4, !tbaa !20
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %25, align 4, !tbaa !20
+  %index.next.1 = add i64 %index, 16
+  %26 = getelementptr inbounds float, float* %2, i64 %index.next.1
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %27, align 4, !tbaa !20
+  %28 = getelementptr float, float* %26, i64 4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %29, align 4, !tbaa !20
+  %index.next.2 = add i64 %index, 24
+  %30 = getelementptr inbounds float, float* %2, i64 %index.next.2
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %31, align 4, !tbaa !20
+  %32 = getelementptr float, float* %30, i64 4
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %33, align 4, !tbaa !20
+  %index.next.3 = add i64 %index, 32
+  %34 = getelementptr inbounds float, float* %2, i64 %index.next.3
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %35, align 4, !tbaa !20
+  %36 = getelementptr float, float* %34, i64 4
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %37, align 4, !tbaa !20
+  %index.next.4 = add i64 %index, 40
+  %38 = getelementptr inbounds float, float* %2, i64 %index.next.4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %39, align 4, !tbaa !20
+  %40 = getelementptr float, float* %38, i64 4
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %41, align 4, !tbaa !20
+  %index.next.5 = add i64 %index, 48
+  %42 = getelementptr inbounds float, float* %2, i64 %index.next.5
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %43, align 4, !tbaa !20
+  %44 = getelementptr float, float* %42, i64 4
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %45, align 4, !tbaa !20
+  %index.next.6 = add i64 %index, 56
+  %46 = getelementptr inbounds float, float* %2, i64 %index.next.6
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %47, align 4, !tbaa !20
+  %48 = getelementptr float, float* %46, i64 4
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <4 x float>* %49, align 4, !tbaa !20
+  %index.next.7 = add i64 %index, 64
+  %50 = icmp eq i64 %index.next.7, %n.vec
+  br i1 %50, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !35
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.unr-lcssa, %vector.body.prol.loopexit
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %if.end, label %for.body.preheader22
+
+for.body.preheader22:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv12.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.011.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader22
+  %conv12 = phi i64 [ %conv, %for.body ], [ %conv12.ph, %for.body.preheader22 ]
+  %i.011 = phi i32 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader22 ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv12
+  store float -1.000000e+00, float* %arrayidx, align 4, !tbaa !20
+  %inc = add i32 %i.011, 1
+  %conv = zext i32 %inc to i64
+  %cmp1 = icmp ult i64 %conv, %4
+  br i1 %cmp1, label %for.body, label %if.end.loopexit, !llvm.loop !36
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block, %if.then, %entry
+  ret void
+}
+
+; Function Attrs: norecurse nounwind uwtable
+define void @_Z14fillTensorValsPv(i8* nocapture readonly %tensor_ptr) local_unnamed_addr #6 {
+entry:
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !19
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !18
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !16
+  %cmp111 = icmp eq i64 %4, 0
+  br i1 %cmp111, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  %min.iters.check = icmp ult i64 %4, 8
+  br i1 %min.iters.check, label %for.body.preheader23, label %min.iters.checked
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %4, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader23, label %vector.scevcheck
+
+vector.scevcheck:                                 ; preds = %min.iters.checked
+  %5 = add i64 %4, -1
+  %6 = trunc i64 %5 to i32
+  %7 = icmp eq i32 %6, -1
+  %8 = icmp ugt i64 %5, 4294967295
+  %9 = or i1 %7, %8
+  %cast.crd = trunc i64 %n.vec to i32
+  br i1 %9, label %for.body.preheader23, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.scevcheck
+  %10 = add i64 %n.vec, -8
+  %11 = lshr exact i64 %10, 3
+  %12 = and i64 %11, 1
+  %lcmp.mod = icmp eq i64 %12, 0
+  br i1 %lcmp.mod, label %vector.body.prol.preheader, label %vector.body.prol.loopexit
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol.preheader
+  %13 = bitcast float* %2 to <4 x float>*
+  store <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, <4 x float>* %13, align 4, !tbaa !20
+  %14 = getelementptr float, float* %2, i64 4
+  %15 = bitcast float* %14 to <4 x float>*
+  store <4 x float> <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>, <4 x float>* %15, align 4, !tbaa !20
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ 8, %vector.body.prol ]
+  %16 = icmp eq i64 %11, 0
+  br i1 %16, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.1, %vector.body ]
+  %17 = trunc i64 %index to i32
+  %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %17, i32 0
+  %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer
+  %18 = add <4 x i32> %broadcast.splat20, <i32 1, i32 2, i32 3, i32 4>
+  %19 = add <4 x i32> %broadcast.splat20, <i32 5, i32 6, i32 7, i32 8>
+  %20 = uitofp <4 x i32> %18 to <4 x float>
+  %21 = uitofp <4 x i32> %19 to <4 x float>
+  %22 = getelementptr inbounds float, float* %2, i64 %index
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> %20, <4 x float>* %23, align 4, !tbaa !20
+  %24 = getelementptr float, float* %22, i64 4
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> %21, <4 x float>* %25, align 4, !tbaa !20
+  %index.next = add i64 %index, 8
+  %26 = trunc i64 %index.next to i32
+  %broadcast.splatinsert19.1 = insertelement <4 x i32> undef, i32 %26, i32 0
+  %broadcast.splat20.1 = shufflevector <4 x i32> %broadcast.splatinsert19.1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %27 = add <4 x i32> %broadcast.splat20.1, <i32 1, i32 2, i32 3, i32 4>
+  %28 = add <4 x i32> %broadcast.splat20.1, <i32 5, i32 6, i32 7, i32 8>
+  %29 = uitofp <4 x i32> %27 to <4 x float>
+  %30 = uitofp <4 x i32> %28 to <4 x float>
+  %31 = getelementptr inbounds float, float* %2, i64 %index.next
+  %32 = bitcast float* %31 to <4 x float>*
+  store <4 x float> %29, <4 x float>* %32, align 4, !tbaa !20
+  %33 = getelementptr float, float* %31, i64 4
+  %34 = bitcast float* %33 to <4 x float>*
+  store <4 x float> %30, <4 x float>* %34, align 4, !tbaa !20
+  %index.next.1 = add i64 %index, 16
+  %35 = icmp eq i64 %index.next.1, %n.vec
+  br i1 %35, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !37
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.unr-lcssa, %vector.body.prol.loopexit
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %if.end, label %for.body.preheader23
+
+for.body.preheader23:                             ; preds = %middle.block, %vector.scevcheck, %min.iters.checked, %for.body.preheader
+  %conv13.ph = phi i64 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  %i.012.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %cast.crd, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader23
+  %conv13 = phi i64 [ %conv, %for.body ], [ %conv13.ph, %for.body.preheader23 ]
+  %i.012 = phi i32 [ %add, %for.body ], [ %i.012.ph, %for.body.preheader23 ]
+  %add = add i32 %i.012, 1
+  %conv2 = uitofp i32 %add to float
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv13
+  store float %conv2, float* %arrayidx, align 4, !tbaa !20
+  %conv = zext i32 %add to i64
+  %cmp1 = icmp ult i64 %conv, %4
+  br i1 %cmp1, label %for.body, label %if.end.loopexit, !llvm.loop !38
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %middle.block, %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z17printTensorValuesPv(i8* %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %data_type = bitcast i8* %tensor_ptr to i32*
+  %0 = load i32, i32* %data_type, align 8, !tbaa !19
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %1 = bitcast i8* %host_data to float**
+  %2 = load float*, float** %1, align 8, !tbaa !18
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %3 = bitcast i8* %num_elems to i64*
+  %4 = load i64, i64* %3, align 8, !tbaa !16
+  %cmp112 = icmp eq i64 %4, 0
+  br i1 %cmp112, label %if.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.then
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %conv14 = phi i64 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %2, i64 %conv14
+  %5 = load float, float* %arrayidx, align 4, !tbaa !20
+  %conv2 = fpext float %5 to double
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.9, i64 0, i64 0), double %conv2)
+  %inc = add i32 %i.013, 1
+  %conv = zext i32 %inc to i64
+  %6 = load i64, i64* %3, align 8, !tbaa !16
+  %cmp1 = icmp ult i64 %conv, %6
+  br i1 %cmp1, label %for.body, label %if.end.loopexit
+
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %if.then, %entry
+  %putchar = tail call i32 @putchar(i32 10) #2
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z15printTensorDimsPv(i8* nocapture readonly %tensor_ptr) local_unnamed_addr #3 {
+entry:
+  %num_elems = getelementptr inbounds i8, i8* %tensor_ptr, i64 48
+  %0 = bitcast i8* %num_elems to i64*
+  %1 = load i64, i64* %0, align 8, !tbaa !16
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.11, i64 0, i64 0), i64 %1)
+  %dims = getelementptr inbounds i8, i8* %tensor_ptr, i64 64
+  %num_dims = bitcast i8* %dims to i32*
+  %2 = load i32, i32* %num_dims, align 8, !tbaa !13
+  %cmp10 = icmp sgt i32 %2, 0
+  br i1 %cmp10, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %dim_sizes = getelementptr inbounds i8, i8* %tensor_ptr, i64 72
+  %3 = bitcast i8* %dim_sizes to i64**
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %4 = load i64*, i64** %3, align 8, !tbaa !14
+  %arrayidx = getelementptr inbounds i64, i64* %4, i64 %indvars.iv
+  %5 = load i64, i64* %arrayidx, align 8, !tbaa !15
+  %6 = trunc i64 %indvars.iv to i32
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.12, i64 0, i64 0), i32 %6, i64 %5)
+  %indvars.iv.next = add nuw i64 %indvars.iv, 1
+  %7 = load i32, i32* %num_dims, align 8, !tbaa !13
+  %8 = sext i32 %7 to i64
+  %cmp = icmp slt i64 %indvars.iv.next, %8
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z14compareTensorsPvS_(i8* %tensor1_ptr, i8* %tensor2_ptr) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor1_ptr, i32 0) #2
+  tail call void @hpvm_request_tensor(i8* %tensor2_ptr, i32 0) #2
+  %host_data = getelementptr inbounds i8, i8* %tensor1_ptr, i64 32
+  %0 = bitcast i8* %host_data to float**
+  %1 = load float*, float** %0, align 8, !tbaa !18
+  %host_data1 = getelementptr inbounds i8, i8* %tensor2_ptr, i64 32
+  %2 = bitcast i8* %host_data1 to float**
+  %3 = load float*, float** %2, align 8, !tbaa !18
+  %num_elems = getelementptr inbounds i8, i8* %tensor1_ptr, i64 48
+  %4 = bitcast i8* %num_elems to i64*
+  %5 = load i64, i64* %4, align 8, !tbaa !16
+  %cmp17 = icmp eq i64 %5, 0
+  br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.inc, %for.body.preheader
+  %conv19 = phi i64 [ %conv, %for.inc ], [ 0, %for.body.preheader ]
+  %i.018 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %1, i64 %conv19
+  %6 = load float, float* %arrayidx, align 4, !tbaa !20
+  %arrayidx3 = getelementptr inbounds float, float* %3, i64 %conv19
+  %7 = load float, float* %arrayidx3, align 4, !tbaa !20
+  %cmp4 = fcmp fast une float %6, %7
+  br i1 %cmp4, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.13, i64 0, i64 0), i32 %i.018)
+  tail call void @abort() #8
+  unreachable
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.018, 1
+  %conv = zext i32 %inc to i64
+  %cmp = icmp ult i64 %conv, %5
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z13compareValuesPvPfm(i8* %tensor_ptr, float* nocapture readonly %data, i64 %num_elems) local_unnamed_addr #3 {
+entry:
+  tail call void @hpvm_request_tensor(i8* %tensor_ptr, i32 0) #2
+  %host_data = getelementptr inbounds i8, i8* %tensor_ptr, i64 32
+  %0 = bitcast i8* %host_data to float**
+  %1 = load float*, float** %0, align 8, !tbaa !18
+  %cmp11 = icmp eq i64 %num_elems, 0
+  br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %conv = zext i32 %inc to i64
+  %cmp = icmp ult i64 %conv, %num_elems
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.cond, %for.body.preheader
+  %conv13 = phi i64 [ %conv, %for.cond ], [ 0, %for.body.preheader ]
+  %i.012 = phi i32 [ %inc, %for.cond ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %1, i64 %conv13
+  %2 = load float, float* %arrayidx, align 4, !tbaa !20
+  %arrayidx2 = getelementptr inbounds float, float* %data, i64 %conv13
+  %3 = load float, float* %arrayidx2, align 4, !tbaa !20
+  %cmp3 = fcmp fast une float %2, %3
+  %inc = add i32 %i.012, 1
+  br i1 %cmp3, label %if.then, label %for.cond
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.14, i64 0, i64 0))
+  tail call void @abort() #8
+  unreachable
+}
+
+; Function Attrs: nounwind uwtable
+define i8* @_Z15readInputTensorPKciiiii(i8* %file_name, i32 %data_type, i32 %dim1_size, i32 %dim2_size, i32 %dim3_size, i32 %dim4_size) local_unnamed_addr #3 {
+entry:
+  %mul = mul nsw i32 %dim2_size, %dim1_size
+  %mul1 = mul nsw i32 %mul, %dim3_size
+  %mul2 = mul nsw i32 %mul1, %dim4_size
+  %mul3 = shl i32 %dim1_size, 2
+  %mul4 = mul nsw i32 %mul3, %dim2_size
+  %mul5 = mul nsw i32 %mul4, %dim3_size
+  %mul6 = mul nsw i32 %mul5, %dim4_size
+  %conv = sext i32 %mul2 to i64
+  %call = tail call noalias i8* @malloc(i64 %conv) #2
+  %mul9 = shl nsw i64 %conv, 2
+  %call10 = tail call noalias i8* @malloc(i64 %mul9) #2
+  %0 = bitcast i8* %call10 to float*
+  %call11 = tail call %struct._IO_FILE* @fopen(i8* %file_name, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.15, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call11, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call12 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.16, i64 0, i64 0), i8* %file_name)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %call14 = tail call i32 @fseek(%struct._IO_FILE* nonnull %call11, i64 16, i32 1)
+  %call17 = tail call i64 @fread(i8* %call, i64 1, i64 %conv, %struct._IO_FILE* nonnull %call11)
+  %cmp1966 = icmp eq i32 %mul2, 0
+  br i1 %cmp1966, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %if.end
+  %1 = icmp ugt i64 %conv, 1
+  %umax = select i1 %1, i64 %conv, i64 1
+  %min.iters.check = icmp ult i64 %umax, 8
+  br i1 %min.iters.check, label %for.body.preheader72, label %min.iters.checked
+
+for.body.preheader72:                             ; preds = %middle.block, %min.iters.checked, %for.body.preheader
+  %i.067.ph = phi i64 [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  br label %for.body
+
+min.iters.checked:                                ; preds = %for.body.preheader
+  %n.vec = and i64 %umax, -8
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %for.body.preheader72, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %min.iters.checked
+  %2 = add nsw i64 %n.vec, -8
+  %3 = lshr exact i64 %2, 3
+  %4 = and i64 %3, 1
+  %lcmp.mod = icmp eq i64 %4, 0
+  br i1 %lcmp.mod, label %vector.body.prol.preheader, label %vector.body.prol.loopexit
+
+vector.body.prol.preheader:                       ; preds = %vector.body.preheader
+  br label %vector.body.prol
+
+vector.body.prol:                                 ; preds = %vector.body.prol.preheader
+  %5 = bitcast i8* %call to <4 x i8>*
+  %wide.load.prol = load <4 x i8>, <4 x i8>* %5, align 1, !tbaa !39
+  %6 = getelementptr i8, i8* %call, i64 4
+  %7 = bitcast i8* %6 to <4 x i8>*
+  %wide.load71.prol = load <4 x i8>, <4 x i8>* %7, align 1, !tbaa !39
+  %8 = uitofp <4 x i8> %wide.load.prol to <4 x float>
+  %9 = uitofp <4 x i8> %wide.load71.prol to <4 x float>
+  %10 = fmul fast <4 x float> %8, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %11 = fmul fast <4 x float> %9, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %12 = bitcast i8* %call10 to <4 x float>*
+  store <4 x float> %10, <4 x float>* %12, align 4, !tbaa !20
+  %13 = getelementptr i8, i8* %call10, i64 16
+  %14 = bitcast i8* %13 to <4 x float>*
+  store <4 x float> %11, <4 x float>* %14, align 4, !tbaa !20
+  br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit:                        ; preds = %vector.body.prol, %vector.body.preheader
+  %index.unr = phi i64 [ 0, %vector.body.preheader ], [ 8, %vector.body.prol ]
+  %15 = icmp eq i64 %3, 0
+  br i1 %15, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new:                        ; preds = %vector.body.prol.loopexit
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.body.preheader.new
+  %index = phi i64 [ %index.unr, %vector.body.preheader.new ], [ %index.next.1, %vector.body ]
+  %16 = getelementptr inbounds i8, i8* %call, i64 %index
+  %17 = bitcast i8* %16 to <4 x i8>*
+  %wide.load = load <4 x i8>, <4 x i8>* %17, align 1, !tbaa !39
+  %18 = getelementptr i8, i8* %16, i64 4
+  %19 = bitcast i8* %18 to <4 x i8>*
+  %wide.load71 = load <4 x i8>, <4 x i8>* %19, align 1, !tbaa !39
+  %20 = uitofp <4 x i8> %wide.load to <4 x float>
+  %21 = uitofp <4 x i8> %wide.load71 to <4 x float>
+  %22 = fmul fast <4 x float> %20, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %23 = fmul fast <4 x float> %21, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %24 = getelementptr inbounds float, float* %0, i64 %index
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> %22, <4 x float>* %25, align 4, !tbaa !20
+  %26 = getelementptr float, float* %24, i64 4
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> %23, <4 x float>* %27, align 4, !tbaa !20
+  %index.next = add i64 %index, 8
+  %28 = getelementptr inbounds i8, i8* %call, i64 %index.next
+  %29 = bitcast i8* %28 to <4 x i8>*
+  %wide.load.1 = load <4 x i8>, <4 x i8>* %29, align 1, !tbaa !39
+  %30 = getelementptr i8, i8* %28, i64 4
+  %31 = bitcast i8* %30 to <4 x i8>*
+  %wide.load71.1 = load <4 x i8>, <4 x i8>* %31, align 1, !tbaa !39
+  %32 = uitofp <4 x i8> %wide.load.1 to <4 x float>
+  %33 = uitofp <4 x i8> %wide.load71.1 to <4 x float>
+  %34 = fmul fast <4 x float> %32, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %35 = fmul fast <4 x float> %33, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  %36 = getelementptr inbounds float, float* %0, i64 %index.next
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> %34, <4 x float>* %37, align 4, !tbaa !20
+  %38 = getelementptr float, float* %36, i64 4
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> %35, <4 x float>* %39, align 4, !tbaa !20
+  %index.next.1 = add i64 %index, 16
+  %40 = icmp eq i64 %index.next.1, %n.vec
+  br i1 %40, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !40
+
+middle.block.unr-lcssa:                           ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.unr-lcssa, %vector.body.prol.loopexit
+  %cmp.n = icmp eq i64 %umax, %n.vec
+  br i1 %cmp.n, label %for.cond.cleanup.loopexit, label %for.body.preheader72
+
+for.cond.cleanup.loopexit.loopexit:               ; preds = %for.body
+  br label %for.cond.cleanup.loopexit
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup.loopexit.loopexit, %middle.block
+  %arrayidx22.phi.trans.insert = getelementptr inbounds i8, i8* %call10, i64 40
+  %.phi.trans.insert = bitcast i8* %arrayidx22.phi.trans.insert to float*
+  %.pre = load float, float* %.phi.trans.insert, align 4, !tbaa !20
+  %phitmp = fpext float %.pre to double
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %if.end
+  %41 = phi double [ %phitmp, %for.cond.cleanup.loopexit ], [ undef, %if.end ]
+  %call24 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.17, i64 0, i64 0), i32 10, double %41)
+  %conv25 = sext i32 %dim1_size to i64
+  %conv26 = sext i32 %dim2_size to i64
+  %conv27 = sext i32 %dim3_size to i64
+  %conv28 = sext i32 %dim4_size to i64
+  %call29 = tail call i8* @create4DTensor(i32 %data_type, i32 0, i64 %conv25, i64 %conv26, i64 %conv27, i64 %conv28) #2
+  %conv30 = sext i32 %mul6 to i64
+  tail call void @initTensorData(i8* %call29, i8* %call10, i64 %conv30) #2
+  tail call void @hpvm_request_tensor(i8* %call29, i32 0) #2
+  %host_data.i = getelementptr inbounds i8, i8* %call29, i64 32
+  %42 = bitcast i8* %host_data.i to float**
+  %43 = load float*, float** %42, align 8, !tbaa !18
+  br i1 %cmp1966, label %_Z13compareValuesPvPfm.exit, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %for.cond.cleanup
+  br label %for.body.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  %conv.i = zext i32 %inc.i to i64
+  %cmp.i = icmp ult i64 %conv.i, %conv
+  br i1 %cmp.i, label %for.body.i, label %_Z13compareValuesPvPfm.exit.loopexit
+
+for.body.i:                                       ; preds = %for.cond.i, %for.body.i.preheader
+  %conv13.i = phi i64 [ %conv.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %i.012.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %arrayidx.i = getelementptr inbounds float, float* %43, i64 %conv13.i
+  %44 = load float, float* %arrayidx.i, align 4, !tbaa !20
+  %arrayidx2.i = getelementptr inbounds float, float* %0, i64 %conv13.i
+  %45 = load float, float* %arrayidx2.i, align 4, !tbaa !20
+  %cmp3.i = fcmp fast une float %44, %45
+  %inc.i = add i32 %i.012.i, 1
+  br i1 %cmp3.i, label %if.then.i, label %for.cond.i
+
+if.then.i:                                        ; preds = %for.body.i
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.14, i64 0, i64 0)) #2
+  tail call void @abort() #8
+  unreachable
+
+_Z13compareValuesPvPfm.exit.loopexit:             ; preds = %for.cond.i
+  br label %_Z13compareValuesPvPfm.exit
+
+_Z13compareValuesPvPfm.exit:                      ; preds = %_Z13compareValuesPvPfm.exit.loopexit, %for.cond.cleanup
+  ret i8* %call29
+
+for.body:                                         ; preds = %for.body, %for.body.preheader72
+  %i.067 = phi i64 [ %inc, %for.body ], [ %i.067.ph, %for.body.preheader72 ]
+  %arrayidx = getelementptr inbounds i8, i8* %call, i64 %i.067
+  %46 = load i8, i8* %arrayidx, align 1, !tbaa !39
+  %conv20 = uitofp i8 %46 to float
+  %div = fmul fast float %conv20, 0x3F70101020000000
+  %arrayidx21 = getelementptr inbounds float, float* %0, i64 %i.067
+  store float %div, float* %arrayidx21, align 4, !tbaa !20
+  %inc = add nuw i64 %i.067, 1
+  %cmp19 = icmp ult i64 %inc, %conv
+  br i1 %cmp19, label %for.body, label %for.cond.cleanup.loopexit.loopexit, !llvm.loop !41
+}
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i32 @fseek(%struct._IO_FILE* nocapture, i64, i32) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+declare i64 @fread(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+declare i8* @create4DTensor(i32, i32, i64, i64, i64, i64) local_unnamed_addr #0
+
+declare void @initTensorData(i8*, i8*, i64) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %file_name, i32 %data_type, i32 %dim1_size, i32 %dim2_size, i32 %dim3_size, i32 %dim4_size) local_unnamed_addr #3 {
+entry:
+  %mul = mul nsw i32 %dim2_size, %dim1_size
+  %mul1 = mul nsw i32 %mul, %dim3_size
+  %mul2 = mul nsw i32 %mul1, %dim4_size
+  %conv = sext i32 %mul2 to i64
+  %mul7 = shl nsw i64 %conv, 2
+  %call = tail call noalias i8* @malloc(i64 %mul7) #2
+  %call8 = tail call %struct._IO_FILE* @fopen(i8* %file_name, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.15, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call8, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call9 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.16, i64 0, i64 0), i8* %file_name)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %0 = bitcast i8* %call to float*
+  %mul3 = shl i32 %dim1_size, 2
+  %mul4 = mul nsw i32 %mul3, %dim2_size
+  %mul5 = mul nsw i32 %mul4, %dim3_size
+  %mul6 = mul nsw i32 %mul5, %dim4_size
+  %call11 = tail call i32 @fseek(%struct._IO_FILE* nonnull %call8, i64 0, i32 1)
+  %conv12 = sext i32 %mul6 to i64
+  %call13 = tail call i64 @fread(i8* %call, i64 1, i64 %conv12, %struct._IO_FILE* nonnull %call8)
+  %conv14 = sext i32 %dim1_size to i64
+  %conv15 = sext i32 %dim2_size to i64
+  %conv16 = sext i32 %dim3_size to i64
+  %conv17 = sext i32 %dim4_size to i64
+  %call18 = tail call i8* @create4DTensor(i32 %data_type, i32 0, i64 %conv14, i64 %conv15, i64 %conv16, i64 %conv17) #2
+  %1 = bitcast i8* %call18 to %struct.Tensor*
+  tail call void @initTensorData(i8* %call18, i8* %call, i64 %conv12) #2
+  tail call void @hpvm_request_tensor(i8* %call18, i32 0) #2
+  %host_data.i = getelementptr inbounds i8, i8* %call18, i64 32
+  %2 = bitcast i8* %host_data.i to float**
+  %3 = load float*, float** %2, align 8, !tbaa !18
+  %cmp11.i = icmp eq i32 %mul2, 0
+  br i1 %cmp11.i, label %_Z13compareValuesPvPfm.exit, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %if.end
+  br label %for.body.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  %conv.i = zext i32 %inc.i to i64
+  %cmp.i = icmp ult i64 %conv.i, %conv
+  br i1 %cmp.i, label %for.body.i, label %_Z13compareValuesPvPfm.exit.loopexit
+
+for.body.i:                                       ; preds = %for.cond.i, %for.body.i.preheader
+  %conv13.i = phi i64 [ %conv.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %i.012.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %for.body.i.preheader ]
+  %arrayidx.i = getelementptr inbounds float, float* %3, i64 %conv13.i
+  %4 = load float, float* %arrayidx.i, align 4, !tbaa !20
+  %arrayidx2.i = getelementptr inbounds float, float* %0, i64 %conv13.i
+  %5 = load float, float* %arrayidx2.i, align 4, !tbaa !20
+  %cmp3.i = fcmp fast une float %4, %5
+  %inc.i = add i32 %i.012.i, 1
+  br i1 %cmp3.i, label %if.then.i, label %for.cond.i
+
+if.then.i:                                        ; preds = %for.body.i
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.14, i64 0, i64 0)) #2
+  tail call void @abort() #8
+  unreachable
+
+_Z13compareValuesPvPfm.exit.loopexit:             ; preds = %for.cond.i
+  br label %_Z13compareValuesPvPfm.exit
+
+_Z13compareValuesPvPfm.exit:                      ; preds = %_Z13compareValuesPvPfm.exit.loopexit, %if.end
+  ret %struct.Tensor* %1
+}
+
+; Function Attrs: nounwind uwtable
+define noalias i8* @_Z10readLabelsPci(i8* %labels_file, i32 %num_labels) local_unnamed_addr #3 {
+entry:
+  %conv = sext i32 %num_labels to i64
+  %call = tail call noalias i8* @malloc(i64 %conv) #2
+  %call1 = tail call %struct._IO_FILE* @fopen(i8* %labels_file, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.15, i64 0, i64 0))
+  %cmp = icmp eq %struct._IO_FILE* %call1, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.18, i64 0, i64 0), i8* %labels_file)
+  tail call void @abort() #8
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %call4 = tail call i32 @fseek(%struct._IO_FILE* nonnull %call1, i64 8, i32 1)
+  %call7 = tail call i64 @fread(i8* %call, i64 1, i64 %conv, %struct._IO_FILE* nonnull %call1)
+  %call8 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.19, i64 0, i64 0), i64 %call7)
+  ret i8* %call
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z15computeAccuracyPciPv(i8* %labels_file, i32 %num_labels, i8* nocapture readonly %result_ptr) local_unnamed_addr #3 {
+entry:
+  %ss = alloca %"class.std::__cxx11::basic_ostringstream", align 16
+  %print_str = alloca %"class.std::__cxx11::basic_string", align 8
+  %call = tail call i8* @_Z10readLabelsPci(i8* %labels_file, i32 %num_labels)
+  %dim_sizes = getelementptr inbounds i8, i8* %result_ptr, i64 72
+  %0 = bitcast i8* %dim_sizes to i64**
+  %1 = load i64*, i64** %0, align 8, !tbaa !14
+  %2 = load i64, i64* %1, align 8, !tbaa !15
+  %arrayidx3 = getelementptr inbounds i64, i64* %1, i64 1
+  %3 = load i64, i64* %arrayidx3, align 8, !tbaa !15
+  %host_data = getelementptr inbounds i8, i8* %result_ptr, i64 32
+  %4 = bitcast i8* %host_data to float**
+  %5 = load float*, float** %4, align 8, !tbaa !18
+  %cmp87 = icmp eq i64 %2, 0
+  br i1 %cmp87, label %for.cond.cleanup, label %for.cond4.preheader.preheader
+
+for.cond4.preheader.preheader:                    ; preds = %entry
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader, %for.cond4.preheader.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %num_errors.089 = phi i32 [ %num_errors.0.inc21, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %mul = mul i64 %indvars.iv, %3
+  %arrayidx10 = getelementptr inbounds float, float* %5, i64 %mul
+  %6 = load float, float* %arrayidx10, align 4, !tbaa !20
+  %add14 = add i64 %mul, 1
+  %arrayidx15 = getelementptr inbounds float, float* %5, i64 %add14
+  %7 = load float, float* %arrayidx15, align 4, !tbaa !20
+  %cmp16 = fcmp fast olt float %6, %7
+  %chosen.1 = zext i1 %cmp16 to i32
+  %conv9.1 = zext i1 %cmp16 to i64
+  %add.1 = add i64 %conv9.1, %mul
+  %arrayidx10.1 = getelementptr inbounds float, float* %5, i64 %add.1
+  %8 = load float, float* %arrayidx10.1, align 4, !tbaa !20
+  %add14.1 = add i64 %mul, 2
+  %arrayidx15.1 = getelementptr inbounds float, float* %5, i64 %add14.1
+  %9 = load float, float* %arrayidx15.1, align 4, !tbaa !20
+  %cmp16.1 = fcmp fast olt float %8, %9
+  %chosen.1.1 = select i1 %cmp16.1, i32 2, i32 %chosen.1
+  %conv9.291 = zext i32 %chosen.1.1 to i64
+  %add.2 = add i64 %conv9.291, %mul
+  %arrayidx10.2 = getelementptr inbounds float, float* %5, i64 %add.2
+  %10 = load float, float* %arrayidx10.2, align 4, !tbaa !20
+  %add14.2 = add i64 %mul, 3
+  %arrayidx15.2 = getelementptr inbounds float, float* %5, i64 %add14.2
+  %11 = load float, float* %arrayidx15.2, align 4, !tbaa !20
+  %cmp16.2 = fcmp fast olt float %10, %11
+  %chosen.1.2 = select i1 %cmp16.2, i32 3, i32 %chosen.1.1
+  %conv9.392 = zext i32 %chosen.1.2 to i64
+  %add.3 = add i64 %conv9.392, %mul
+  %arrayidx10.3 = getelementptr inbounds float, float* %5, i64 %add.3
+  %12 = load float, float* %arrayidx10.3, align 4, !tbaa !20
+  %add14.3 = add i64 %mul, 4
+  %arrayidx15.3 = getelementptr inbounds float, float* %5, i64 %add14.3
+  %13 = load float, float* %arrayidx15.3, align 4, !tbaa !20
+  %cmp16.3 = fcmp fast olt float %12, %13
+  %chosen.1.3 = select i1 %cmp16.3, i32 4, i32 %chosen.1.2
+  %conv9.493 = zext i32 %chosen.1.3 to i64
+  %add.4 = add i64 %conv9.493, %mul
+  %arrayidx10.4 = getelementptr inbounds float, float* %5, i64 %add.4
+  %14 = load float, float* %arrayidx10.4, align 4, !tbaa !20
+  %add14.4 = add i64 %mul, 5
+  %arrayidx15.4 = getelementptr inbounds float, float* %5, i64 %add14.4
+  %15 = load float, float* %arrayidx15.4, align 4, !tbaa !20
+  %cmp16.4 = fcmp fast olt float %14, %15
+  %chosen.1.4 = select i1 %cmp16.4, i32 5, i32 %chosen.1.3
+  %conv9.594 = zext i32 %chosen.1.4 to i64
+  %add.5 = add i64 %conv9.594, %mul
+  %arrayidx10.5 = getelementptr inbounds float, float* %5, i64 %add.5
+  %16 = load float, float* %arrayidx10.5, align 4, !tbaa !20
+  %add14.5 = add i64 %mul, 6
+  %arrayidx15.5 = getelementptr inbounds float, float* %5, i64 %add14.5
+  %17 = load float, float* %arrayidx15.5, align 4, !tbaa !20
+  %cmp16.5 = fcmp fast olt float %16, %17
+  %chosen.1.5 = select i1 %cmp16.5, i32 6, i32 %chosen.1.4
+  %18 = zext i32 %chosen.1.5 to i64
+  %add.6 = add i64 %18, %mul
+  %arrayidx10.6 = getelementptr inbounds float, float* %5, i64 %add.6
+  %19 = load float, float* %arrayidx10.6, align 4, !tbaa !20
+  %add14.6 = add i64 %mul, 7
+  %arrayidx15.6 = getelementptr inbounds float, float* %5, i64 %add14.6
+  %20 = load float, float* %arrayidx15.6, align 4, !tbaa !20
+  %cmp16.6 = fcmp fast olt float %19, %20
+  %chosen.1.6 = select i1 %cmp16.6, i32 7, i32 %chosen.1.5
+  %conv9.7 = sext i32 %chosen.1.6 to i64
+  %add.7 = add i64 %conv9.7, %mul
+  %arrayidx10.7 = getelementptr inbounds float, float* %5, i64 %add.7
+  %21 = load float, float* %arrayidx10.7, align 4, !tbaa !20
+  %add14.7 = add i64 %mul, 8
+  %arrayidx15.7 = getelementptr inbounds float, float* %5, i64 %add14.7
+  %22 = load float, float* %arrayidx15.7, align 4, !tbaa !20
+  %cmp16.7 = fcmp fast olt float %21, %22
+  %chosen.1.7 = select i1 %cmp16.7, i32 8, i32 %chosen.1.6
+  %conv9.8 = sext i32 %chosen.1.7 to i64
+  %add.8 = add i64 %conv9.8, %mul
+  %arrayidx10.8 = getelementptr inbounds float, float* %5, i64 %add.8
+  %23 = load float, float* %arrayidx10.8, align 4, !tbaa !20
+  %add14.8 = add i64 %mul, 9
+  %arrayidx15.8 = getelementptr inbounds float, float* %5, i64 %add14.8
+  %24 = load float, float* %arrayidx15.8, align 4, !tbaa !20
+  %cmp16.8 = fcmp fast olt float %23, %24
+  %chosen.1.8 = select i1 %cmp16.8, i32 9, i32 %chosen.1.7
+  %arrayidx17 = getelementptr inbounds i8, i8* %call, i64 %indvars.iv
+  %25 = load i8, i8* %arrayidx17, align 1, !tbaa !39
+  %conv18 = zext i8 %25 to i32
+  %not.cmp19 = icmp ne i32 %chosen.1.8, %conv18
+  %inc21 = zext i1 %not.cmp19 to i32
+  %num_errors.0.inc21 = add nsw i32 %inc21, %num_errors.089
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %2
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.cond4.preheader
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond4.preheader
+  %phitmp = sext i32 %num_errors.0.inc21 to i64
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %num_errors.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+  %sub = sub i64 %2, %num_errors.0.lcssa
+  %conv27 = uitofp i64 %sub to double
+  %conv29 = uitofp i64 %2 to double
+  %div = fdiv fast double %conv27, %conv29
+  %mul31 = fmul fast double %div, 1.000000e+02
+  %conv32 = fptrunc double %mul31 to float
+  %conv33 = fpext float %conv32 to double
+  %call34 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.20, i64 0, i64 0), double %conv33)
+  %call35 = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.21, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.22, i64 0, i64 0))
+  %cmp36 = icmp eq %struct._IO_FILE* %call35, null
+  br i1 %cmp36, label %if.end44, label %if.then37
+
+if.then37:                                        ; preds = %for.cond.cleanup
+  %26 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8*
+  call void @llvm.lifetime.start(i64 376, i8* nonnull %26) #2
+  %27 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2
+  %28 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0
+  call void @_ZNSt8ios_baseC2Ev(%"class.std::ios_base"* %28) #2
+  %29 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !43
+  %_M_tie.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 1
+  store %"class.std::basic_ostream"* null, %"class.std::basic_ostream"** %_M_tie.i.i, align 8, !tbaa !45
+  %_M_fill.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 2
+  store i8 0, i8* %_M_fill.i.i, align 16, !tbaa !48
+  %_M_fill_init.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 3
+  store i8 0, i8* %_M_fill_init.i.i, align 1, !tbaa !49
+  %_M_streambuf.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 4
+  %30 = bitcast %"class.std::basic_streambuf"** %_M_streambuf.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %30, i8 0, i64 32, i32 8, i1 false) #2
+  %31 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 1) to i64*), align 8
+  %32 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i64*
+  store i64 %31, i64* %32, align 16, !tbaa !43
+  %33 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 2) to i64*), align 8
+  %34 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8**
+  %vtable.cast.i.i = inttoptr i64 %31 to i8*
+  %vbase.offset.ptr.i.i = getelementptr i8, i8* %vtable.cast.i.i, i64 -24
+  %35 = bitcast i8* %vbase.offset.ptr.i.i to i64*
+  %vbase.offset.i.i = load i64, i64* %35, align 8
+  %add.ptr.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i
+  %36 = bitcast i8* %add.ptr.i.i to i64*
+  store i64 %33, i64* %36, align 8, !tbaa !43
+  %vtable3.i.i = load i8*, i8** %34, align 16, !tbaa !43
+  %vbase.offset.ptr4.i.i = getelementptr i8, i8* %vtable3.i.i, i64 -24
+  %37 = bitcast i8* %vbase.offset.ptr4.i.i to i64*
+  %vbase.offset5.i.i = load i64, i64* %37, align 8
+  %add.ptr6.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset5.i.i
+  %38 = bitcast i8* %add.ptr6.i.i to %"class.std::basic_ios"*
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %38, %"class.std::basic_streambuf"* null) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !43
+  %_M_stringbuf.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1
+  %39 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0, i32 0
+  %40 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to <2 x i32 (...)**>*
+  store <2 x i32 (...)**> <i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**)>, <2 x i32 (...)**>* %40, align 16, !tbaa !43
+  %_M_in_beg.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 1
+  %_M_buf_locale.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 7
+  %41 = bitcast i8** %_M_in_beg.i.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %41, i8 0, i64 48, i32 8, i1 false) #2
+  call void @_ZNSt6localeC1Ev(%"class.std::locale"* %_M_buf_locale.i.i.i) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %39, align 8, !tbaa !43
+  %_M_mode.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 1
+  store i32 16, i32* %_M_mode.i.i, align 8, !tbaa !50
+  %_M_string.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2
+  %42 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 2
+  %43 = bitcast %"class.std::__cxx11::basic_string"* %_M_string.i.i to %union.anon**
+  store %union.anon* %42, %union.anon** %43, align 8, !tbaa !55
+  %_M_string_length.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 1
+  store i64 0, i64* %_M_string_length.i.i.i.i.i, align 8, !tbaa !56
+  %.cast.i.i.i = bitcast %union.anon* %42 to i8*
+  store i8 0, i8* %.cast.i.i.i, align 8, !tbaa !39
+  %vtable.i = load i8*, i8** %34, align 16, !tbaa !43
+  %vbase.offset.ptr.i = getelementptr i8, i8* %vtable.i, i64 -24
+  %44 = bitcast i8* %vbase.offset.ptr.i to i64*
+  %vbase.offset.i = load i64, i64* %44, align 8
+  %add.ptr2.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i
+  %45 = bitcast i8* %add.ptr2.i to %"class.std::basic_ios"*
+  %46 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %45, %"class.std::basic_streambuf"* %46) #2
+  %47 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to %"class.std::basic_ostream"*
+  %vtable.i74 = load i8*, i8** %34, align 16, !tbaa !43
+  %vbase.offset.ptr.i75 = getelementptr i8, i8* %vtable.i74, i64 -24
+  %48 = bitcast i8* %vbase.offset.ptr.i75 to i64*
+  %vbase.offset.i76 = load i64, i64* %48, align 8
+  %add.ptr.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i76
+  %_M_flags.i = getelementptr inbounds i8, i8* %add.ptr.i, i64 24
+  %49 = bitcast i8* %_M_flags.i to i32*
+  %50 = load i32, i32* %49, align 4, !tbaa !57
+  %and.i = and i32 %50, -261
+  %or.i = or i32 %and.i, 4
+  store i32 %or.i, i32* %49, align 4, !tbaa !57
+  %call.i = call dereferenceable(272) %"class.std::basic_ostream"* @_ZNSo9_M_insertIdEERSoT_(%"class.std::basic_ostream"* nonnull %47, double %conv33) #2
+  %51 = bitcast %"class.std::__cxx11::basic_string"* %print_str to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %51) #2
+  call void @_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv(%"class.std::__cxx11::basic_string"* nonnull sret %print_str, %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i) #2
+  %_M_p.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 0, i32 0
+  %52 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %_M_string_length.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 1
+  %53 = load i64, i64* %_M_string_length.i, align 8, !tbaa !56
+  %call42 = call i64 @fwrite(i8* %52, i64 1, i64 %53, %struct._IO_FILE* nonnull %call35)
+  %call43 = call i32 @fclose(%struct._IO_FILE* nonnull %call35)
+  %54 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %55 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 2
+  %arraydecay.i.i.i.i = bitcast %union.anon* %55 to i8*
+  %cmp.i.i.i = icmp eq i8* %54, %arraydecay.i.i.i.i
+  br i1 %cmp.i.i.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %if.then37
+  call void @_ZdlPv(i8* %54) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit: ; preds = %if.then.i.i, %if.then37
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %51) #2
+  %56 = load i64, i64* bitcast ([4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE to i64*), align 8
+  store i64 %56, i64* %32, align 16, !tbaa !43
+  %57 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 3) to i64*), align 8
+  %vtable.cast.i.i81 = inttoptr i64 %56 to i8*
+  %vbase.offset.ptr.i.i82 = getelementptr i8, i8* %vtable.cast.i.i81, i64 -24
+  %58 = bitcast i8* %vbase.offset.ptr.i.i82 to i64*
+  %vbase.offset.i.i83 = load i64, i64* %58, align 8
+  %add.ptr.i.i84 = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i83
+  %59 = bitcast i8* %add.ptr.i.i84 to i64*
+  store i64 %57, i64* %59, align 8, !tbaa !43
+  %60 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !43
+  %_M_p.i.i.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 0, i32 0
+  %61 = load i8*, i8** %_M_p.i.i.i.i.i.i.i, align 8, !tbaa !59
+  %cmp.i.i.i.i.i.i = icmp eq i8* %61, %.cast.i.i.i
+  br i1 %cmp.i.i.i.i.i.i, label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+  call void @_ZdlPv(i8* %61) #2
+  br label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit
+
+_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit: ; preds = %if.then.i.i.i.i.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !43
+  call void @_ZNSt6localeD1Ev(%"class.std::locale"* nonnull %_M_buf_locale.i.i.i) #2
+  %62 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 0
+  call void @_ZNSt8ios_baseD2Ev(%"class.std::ios_base"* %62) #2
+  call void @llvm.lifetime.end(i64 376, i8* nonnull %26) #2
+  br label %if.end44
+
+if.end44:                                         ; preds = %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit, %for.cond.cleanup
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z16computeAccuracy2PhiPv(i8* nocapture readonly %labels, i32 %num_labels, i8* nocapture readonly %result_ptr) local_unnamed_addr #3 {
+entry:
+  %ss = alloca %"class.std::__cxx11::basic_ostringstream", align 16
+  %print_str = alloca %"class.std::__cxx11::basic_string", align 8
+  %dim_sizes = getelementptr inbounds i8, i8* %result_ptr, i64 72
+  %0 = bitcast i8* %dim_sizes to i64**
+  %1 = load i64*, i64** %0, align 8, !tbaa !14
+  %2 = load i64, i64* %1, align 8, !tbaa !15
+  %arrayidx3 = getelementptr inbounds i64, i64* %1, i64 1
+  %3 = load i64, i64* %arrayidx3, align 8, !tbaa !15
+  %host_data = getelementptr inbounds i8, i8* %result_ptr, i64 32
+  %4 = bitcast i8* %host_data to float**
+  %5 = load float*, float** %4, align 8, !tbaa !18
+  %cmp82 = icmp eq i64 %2, 0
+  br i1 %cmp82, label %for.cond.cleanup, label %for.cond4.preheader.preheader
+
+for.cond4.preheader.preheader:                    ; preds = %entry
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader, %for.cond4.preheader.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %num_errors.084 = phi i32 [ %num_errors.0.inc21, %for.cond4.preheader ], [ 0, %for.cond4.preheader.preheader ]
+  %mul = mul i64 %indvars.iv, %3
+  %arrayidx10 = getelementptr inbounds float, float* %5, i64 %mul
+  %6 = load float, float* %arrayidx10, align 4, !tbaa !20
+  %add14 = add i64 %mul, 1
+  %arrayidx15 = getelementptr inbounds float, float* %5, i64 %add14
+  %7 = load float, float* %arrayidx15, align 4, !tbaa !20
+  %cmp16 = fcmp fast olt float %6, %7
+  %chosen.1 = zext i1 %cmp16 to i32
+  %conv9.1 = zext i1 %cmp16 to i64
+  %add.1 = add i64 %conv9.1, %mul
+  %arrayidx10.1 = getelementptr inbounds float, float* %5, i64 %add.1
+  %8 = load float, float* %arrayidx10.1, align 4, !tbaa !20
+  %add14.1 = add i64 %mul, 2
+  %arrayidx15.1 = getelementptr inbounds float, float* %5, i64 %add14.1
+  %9 = load float, float* %arrayidx15.1, align 4, !tbaa !20
+  %cmp16.1 = fcmp fast olt float %8, %9
+  %chosen.1.1 = select i1 %cmp16.1, i32 2, i32 %chosen.1
+  %conv9.286 = zext i32 %chosen.1.1 to i64
+  %add.2 = add i64 %conv9.286, %mul
+  %arrayidx10.2 = getelementptr inbounds float, float* %5, i64 %add.2
+  %10 = load float, float* %arrayidx10.2, align 4, !tbaa !20
+  %add14.2 = add i64 %mul, 3
+  %arrayidx15.2 = getelementptr inbounds float, float* %5, i64 %add14.2
+  %11 = load float, float* %arrayidx15.2, align 4, !tbaa !20
+  %cmp16.2 = fcmp fast olt float %10, %11
+  %chosen.1.2 = select i1 %cmp16.2, i32 3, i32 %chosen.1.1
+  %conv9.387 = zext i32 %chosen.1.2 to i64
+  %add.3 = add i64 %conv9.387, %mul
+  %arrayidx10.3 = getelementptr inbounds float, float* %5, i64 %add.3
+  %12 = load float, float* %arrayidx10.3, align 4, !tbaa !20
+  %add14.3 = add i64 %mul, 4
+  %arrayidx15.3 = getelementptr inbounds float, float* %5, i64 %add14.3
+  %13 = load float, float* %arrayidx15.3, align 4, !tbaa !20
+  %cmp16.3 = fcmp fast olt float %12, %13
+  %chosen.1.3 = select i1 %cmp16.3, i32 4, i32 %chosen.1.2
+  %conv9.488 = zext i32 %chosen.1.3 to i64
+  %add.4 = add i64 %conv9.488, %mul
+  %arrayidx10.4 = getelementptr inbounds float, float* %5, i64 %add.4
+  %14 = load float, float* %arrayidx10.4, align 4, !tbaa !20
+  %add14.4 = add i64 %mul, 5
+  %arrayidx15.4 = getelementptr inbounds float, float* %5, i64 %add14.4
+  %15 = load float, float* %arrayidx15.4, align 4, !tbaa !20
+  %cmp16.4 = fcmp fast olt float %14, %15
+  %chosen.1.4 = select i1 %cmp16.4, i32 5, i32 %chosen.1.3
+  %conv9.589 = zext i32 %chosen.1.4 to i64
+  %add.5 = add i64 %conv9.589, %mul
+  %arrayidx10.5 = getelementptr inbounds float, float* %5, i64 %add.5
+  %16 = load float, float* %arrayidx10.5, align 4, !tbaa !20
+  %add14.5 = add i64 %mul, 6
+  %arrayidx15.5 = getelementptr inbounds float, float* %5, i64 %add14.5
+  %17 = load float, float* %arrayidx15.5, align 4, !tbaa !20
+  %cmp16.5 = fcmp fast olt float %16, %17
+  %chosen.1.5 = select i1 %cmp16.5, i32 6, i32 %chosen.1.4
+  %18 = zext i32 %chosen.1.5 to i64
+  %add.6 = add i64 %18, %mul
+  %arrayidx10.6 = getelementptr inbounds float, float* %5, i64 %add.6
+  %19 = load float, float* %arrayidx10.6, align 4, !tbaa !20
+  %add14.6 = add i64 %mul, 7
+  %arrayidx15.6 = getelementptr inbounds float, float* %5, i64 %add14.6
+  %20 = load float, float* %arrayidx15.6, align 4, !tbaa !20
+  %cmp16.6 = fcmp fast olt float %19, %20
+  %chosen.1.6 = select i1 %cmp16.6, i32 7, i32 %chosen.1.5
+  %conv9.7 = sext i32 %chosen.1.6 to i64
+  %add.7 = add i64 %conv9.7, %mul
+  %arrayidx10.7 = getelementptr inbounds float, float* %5, i64 %add.7
+  %21 = load float, float* %arrayidx10.7, align 4, !tbaa !20
+  %add14.7 = add i64 %mul, 8
+  %arrayidx15.7 = getelementptr inbounds float, float* %5, i64 %add14.7
+  %22 = load float, float* %arrayidx15.7, align 4, !tbaa !20
+  %cmp16.7 = fcmp fast olt float %21, %22
+  %chosen.1.7 = select i1 %cmp16.7, i32 8, i32 %chosen.1.6
+  %conv9.8 = sext i32 %chosen.1.7 to i64
+  %add.8 = add i64 %conv9.8, %mul
+  %arrayidx10.8 = getelementptr inbounds float, float* %5, i64 %add.8
+  %23 = load float, float* %arrayidx10.8, align 4, !tbaa !20
+  %add14.8 = add i64 %mul, 9
+  %arrayidx15.8 = getelementptr inbounds float, float* %5, i64 %add14.8
+  %24 = load float, float* %arrayidx15.8, align 4, !tbaa !20
+  %cmp16.8 = fcmp fast olt float %23, %24
+  %chosen.1.8 = select i1 %cmp16.8, i32 9, i32 %chosen.1.7
+  %arrayidx17 = getelementptr inbounds i8, i8* %labels, i64 %indvars.iv
+  %25 = load i8, i8* %arrayidx17, align 1, !tbaa !39
+  %conv18 = zext i8 %25 to i32
+  %not.cmp19 = icmp ne i32 %chosen.1.8, %conv18
+  %inc21 = zext i1 %not.cmp19 to i32
+  %num_errors.0.inc21 = add nsw i32 %inc21, %num_errors.084
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %2
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.cond4.preheader
+
+for.cond.cleanup.loopexit:                        ; preds = %for.cond4.preheader
+  %phitmp = sext i32 %num_errors.0.inc21 to i64
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %num_errors.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+  %sub = sub i64 %2, %num_errors.0.lcssa
+  %conv27 = uitofp i64 %sub to double
+  %conv29 = uitofp i64 %2 to double
+  %div = fdiv fast double %conv27, %conv29
+  %mul31 = fmul fast double %div, 1.000000e+02
+  %conv32 = fptrunc double %mul31 to float
+  %conv33 = fpext float %conv32 to double
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.20, i64 0, i64 0), double %conv33)
+  %call34 = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.21, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.22, i64 0, i64 0))
+  %cmp35 = icmp eq %struct._IO_FILE* %call34, null
+  br i1 %cmp35, label %if.end43, label %if.then36
+
+if.then36:                                        ; preds = %for.cond.cleanup
+  %26 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8*
+  call void @llvm.lifetime.start(i64 376, i8* nonnull %26) #2
+  %27 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2
+  %28 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0
+  call void @_ZNSt8ios_baseC2Ev(%"class.std::ios_base"* %28) #2
+  %29 = getelementptr inbounds %"class.std::basic_ios", %"class.std::basic_ios"* %27, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !43
+  %_M_tie.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 1
+  store %"class.std::basic_ostream"* null, %"class.std::basic_ostream"** %_M_tie.i.i, align 8, !tbaa !45
+  %_M_fill.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 2
+  store i8 0, i8* %_M_fill.i.i, align 16, !tbaa !48
+  %_M_fill_init.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 3
+  store i8 0, i8* %_M_fill_init.i.i, align 1, !tbaa !49
+  %_M_streambuf.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 4
+  %30 = bitcast %"class.std::basic_streambuf"** %_M_streambuf.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %30, i8 0, i64 32, i32 8, i1 false) #2
+  %31 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 1) to i64*), align 8
+  %32 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i64*
+  store i64 %31, i64* %32, align 16, !tbaa !43
+  %33 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 2) to i64*), align 8
+  %34 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to i8**
+  %vtable.cast.i.i = inttoptr i64 %31 to i8*
+  %vbase.offset.ptr.i.i = getelementptr i8, i8* %vtable.cast.i.i, i64 -24
+  %35 = bitcast i8* %vbase.offset.ptr.i.i to i64*
+  %vbase.offset.i.i = load i64, i64* %35, align 8
+  %add.ptr.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i
+  %36 = bitcast i8* %add.ptr.i.i to i64*
+  store i64 %33, i64* %36, align 8, !tbaa !43
+  %vtable3.i.i = load i8*, i8** %34, align 16, !tbaa !43
+  %vbase.offset.ptr4.i.i = getelementptr i8, i8* %vtable3.i.i, i64 -24
+  %37 = bitcast i8* %vbase.offset.ptr4.i.i to i64*
+  %vbase.offset5.i.i = load i64, i64* %37, align 8
+  %add.ptr6.i.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset5.i.i
+  %38 = bitcast i8* %add.ptr6.i.i to %"class.std::basic_ios"*
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %38, %"class.std::basic_streambuf"* null) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** %29, align 16, !tbaa !43
+  %_M_stringbuf.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1
+  %39 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0, i32 0
+  %40 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to <2 x i32 (...)**>*
+  store <2 x i32 (...)**> <i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*], [5 x i8*] }, { [5 x i8*], [5 x i8*] }* @_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**)>, <2 x i32 (...)**>* %40, align 16, !tbaa !43
+  %_M_in_beg.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 1
+  %_M_buf_locale.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 7
+  %41 = bitcast i8** %_M_in_beg.i.i.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %41, i8 0, i64 48, i32 8, i1 false) #2
+  call void @_ZNSt6localeC1Ev(%"class.std::locale"* %_M_buf_locale.i.i.i) #2
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %39, align 8, !tbaa !43
+  %_M_mode.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 1
+  store i32 16, i32* %_M_mode.i.i, align 8, !tbaa !50
+  %_M_string.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2
+  %42 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 2
+  %43 = bitcast %"class.std::__cxx11::basic_string"* %_M_string.i.i to %union.anon**
+  store %union.anon* %42, %union.anon** %43, align 8, !tbaa !55
+  %_M_string_length.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 1
+  store i64 0, i64* %_M_string_length.i.i.i.i.i, align 8, !tbaa !56
+  %.cast.i.i.i = bitcast %union.anon* %42 to i8*
+  store i8 0, i8* %.cast.i.i.i, align 8, !tbaa !39
+  %vtable.i = load i8*, i8** %34, align 16, !tbaa !43
+  %vbase.offset.ptr.i = getelementptr i8, i8* %vtable.i, i64 -24
+  %44 = bitcast i8* %vbase.offset.ptr.i to i64*
+  %vbase.offset.i = load i64, i64* %44, align 8
+  %add.ptr2.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i
+  %45 = bitcast i8* %add.ptr2.i to %"class.std::basic_ios"*
+  %46 = getelementptr inbounds %"class.std::__cxx11::basic_stringbuf", %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i, i64 0, i32 0
+  call void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"* %45, %"class.std::basic_streambuf"* %46) #2
+  %47 = bitcast %"class.std::__cxx11::basic_ostringstream"* %ss to %"class.std::basic_ostream"*
+  %vtable.i72 = load i8*, i8** %34, align 16, !tbaa !43
+  %vbase.offset.ptr.i73 = getelementptr i8, i8* %vtable.i72, i64 -24
+  %48 = bitcast i8* %vbase.offset.ptr.i73 to i64*
+  %vbase.offset.i74 = load i64, i64* %48, align 8
+  %add.ptr.i = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i74
+  %_M_flags.i.i = getelementptr inbounds i8, i8* %add.ptr.i, i64 24
+  %49 = bitcast i8* %_M_flags.i.i to i32*
+  %50 = load i32, i32* %49, align 8, !tbaa !60
+  %and.i.i.i.i = and i32 %50, -261
+  %or.i.i.i.i = or i32 %and.i.i.i.i, 4
+  store i32 %or.i.i.i.i, i32* %49, align 4, !tbaa !57
+  %call.i = call dereferenceable(272) %"class.std::basic_ostream"* @_ZNSo9_M_insertIdEERSoT_(%"class.std::basic_ostream"* nonnull %47, double %conv33) #2
+  %51 = bitcast %"class.std::__cxx11::basic_string"* %print_str to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %51) #2
+  call void @_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv(%"class.std::__cxx11::basic_string"* nonnull sret %print_str, %"class.std::__cxx11::basic_stringbuf"* %_M_stringbuf.i) #2
+  %_M_p.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 0, i32 0
+  %52 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %_M_string_length.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 1
+  %53 = load i64, i64* %_M_string_length.i, align 8, !tbaa !56
+  %call41 = call i64 @fwrite(i8* %52, i64 1, i64 %53, %struct._IO_FILE* nonnull %call34)
+  %call42 = call i32 @fclose(%struct._IO_FILE* nonnull %call34)
+  %54 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %55 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %print_str, i64 0, i32 2
+  %arraydecay.i.i.i.i = bitcast %union.anon* %55 to i8*
+  %cmp.i.i.i = icmp eq i8* %54, %arraydecay.i.i.i.i
+  br i1 %cmp.i.i.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %if.then36
+  call void @_ZdlPv(i8* %54) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit: ; preds = %if.then.i.i, %if.then36
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %51) #2
+  %56 = load i64, i64* bitcast ([4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE to i64*), align 8
+  store i64 %56, i64* %32, align 16, !tbaa !43
+  %57 = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE, i64 0, i64 3) to i64*), align 8
+  %vtable.cast.i.i76 = inttoptr i64 %56 to i8*
+  %vbase.offset.ptr.i.i77 = getelementptr i8, i8* %vtable.cast.i.i76, i64 -24
+  %58 = bitcast i8* %vbase.offset.ptr.i.i77 to i64*
+  %vbase.offset.i.i78 = load i64, i64* %58, align 8
+  %add.ptr.i.i79 = getelementptr inbounds i8, i8* %26, i64 %vbase.offset.i.i78
+  %59 = bitcast i8* %add.ptr.i.i79 to i64*
+  store i64 %57, i64* %59, align 8, !tbaa !43
+  %60 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !43
+  %_M_p.i.i.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 1, i32 2, i32 0, i32 0
+  %61 = load i8*, i8** %_M_p.i.i.i.i.i.i.i, align 8, !tbaa !59
+  %cmp.i.i.i.i.i.i = icmp eq i8* %61, %.cast.i.i.i
+  br i1 %cmp.i.i.i.i.i.i, label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+  call void @_ZdlPv(i8* %61) #2
+  br label %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit
+
+_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit: ; preds = %if.then.i.i.i.i.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [16 x i8*] }, { [16 x i8*] }* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %60, align 8, !tbaa !43
+  call void @_ZNSt6localeD1Ev(%"class.std::locale"* nonnull %_M_buf_locale.i.i.i) #2
+  %62 = getelementptr inbounds %"class.std::__cxx11::basic_ostringstream", %"class.std::__cxx11::basic_ostringstream"* %ss, i64 0, i32 2, i32 0
+  call void @_ZNSt8ios_baseD2Ev(%"class.std::ios_base"* %62) #2
+  call void @llvm.lifetime.end(i64 376, i8* nonnull %26) #2
+  br label %if.end43
+
+if.end43:                                         ; preds = %_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev.exit, %for.cond.cleanup
+  ret void
+}
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @main() local_unnamed_addr #6 {
+entry:
+  %__dnew.i.i.i.i207 = alloca i64, align 8
+  %__dnew.i.i.i.i166 = alloca i64, align 8
+  %__dnew.i.i.i.i125 = alloca i64, align 8
+  %__dnew.i.i.i.i84 = alloca i64, align 8
+  %__dnew.i.i.i.i55 = alloca i64, align 8
+  %__dnew.i.i.i.i = alloca i64, align 8
+  %prefix = alloca %"class.std::__cxx11::basic_string", align 8
+  %input_data_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp1 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv1_w_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp3 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv1_b_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp5 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv2_w_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp7 = alloca %"class.std::__cxx11::basic_string", align 8
+  %conv2_b_path = alloca %"class.std::__cxx11::basic_string", align 8
+  %ref.tmp9 = alloca %"class.std::__cxx11::basic_string", align 8
+  %0 = bitcast %"class.std::__cxx11::basic_string"* %prefix to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %0) #2
+  %1 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 2
+  %2 = bitcast %"class.std::__cxx11::basic_string"* %prefix to %union.anon**
+  store %union.anon* %1, %union.anon** %2, align 8, !tbaa !55
+  %3 = bitcast %union.anon* %1 to i8*
+  %4 = bitcast i64* %__dnew.i.i.i.i to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %4) #2
+  store i64 54, i64* %__dnew.i.i.i.i, align 8, !tbaa !15
+  %call5.i.i.i.i = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %prefix, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i, i64 0) #2
+  %_M_p.i13.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59
+  %5 = load i64, i64* %__dnew.i.i.i.i, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 2, i32 0
+  store i64 %5, i64* %_M_allocated_capacity.i.i.i.i.i, align 8, !tbaa !15
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i, i8* nonnull getelementptr inbounds ([55 x i8], [55 x i8]* @.str.23, i64 0, i64 0), i64 54, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %prefix, i64 0, i32 1
+  store i64 %5, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !56
+  %arrayidx.i.i.i.i.i = getelementptr inbounds i8, i8* %call5.i.i.i.i, i64 %5
+  store i8 0, i8* %arrayidx.i.i.i.i.i, align 1, !tbaa !39
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %4) #2
+  %6 = bitcast %"class.std::__cxx11::basic_string"* %input_data_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %6) #2
+  %7 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp1 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %7) #2
+  %8 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 2
+  %9 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp1 to %union.anon**
+  store %union.anon* %8, %union.anon** %9, align 8, !tbaa !55
+  %10 = bitcast %union.anon* %8 to i8*
+  %11 = bitcast i64* %__dnew.i.i.i.i55 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %11) #2
+  store i64 34, i64* %__dnew.i.i.i.i55, align 8, !tbaa !15
+  %call5.i.i.i.i60 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp1, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i55, i64 0) #2
+  %_M_p.i13.i.i.i.i61 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i60, i8** %_M_p.i13.i.i.i.i61, align 8, !tbaa !59
+  %12 = load i64, i64* %__dnew.i.i.i.i55, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i.i.i62 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 2, i32 0
+  store i64 %12, i64* %_M_allocated_capacity.i.i.i.i.i62, align 8, !tbaa !15
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i60, i8* nonnull getelementptr inbounds ([35 x i8], [35 x i8]* @.str.24, i64 0, i64 0), i64 34, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i68 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp1, i64 0, i32 1
+  store i64 %12, i64* %_M_string_length.i.i.i.i.i.i68, align 8, !tbaa !56
+  %arrayidx.i.i.i.i.i69 = getelementptr inbounds i8, i8* %call5.i.i.i.i60, i64 %12
+  store i8 0, i8* %arrayidx.i.i.i.i.i69, align 1, !tbaa !39
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %11) #2
+  %13 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !56, !noalias !65
+  %14 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59, !noalias !65
+  %call3.i.i.i = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp1, i64 0, i64 0, i8* %14, i64 %13) #2, !noalias !65
+  %15 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 2
+  %16 = bitcast %"class.std::__cxx11::basic_string"* %input_data_path to %union.anon**
+  store %union.anon* %15, %union.anon** %16, align 8, !tbaa !55, !alias.scope !65
+  %_M_p.i.i23.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 0, i32 0
+  %17 = load i8*, i8** %_M_p.i.i23.i.i, align 8, !tbaa !59
+  %18 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 2
+  %arraydecay.i.i.i.i71 = bitcast %union.anon* %18 to i8*
+  %cmp.i.i.i72 = icmp eq i8* %17, %arraydecay.i.i.i.i71
+  br i1 %cmp.i.i.i72, label %if.then.i.i73, label %if.else.i.i
+
+if.then.i.i73:                                    ; preds = %entry
+  %arraydecay.i.i.i = bitcast %union.anon* %15 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i, i8* %17, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+
+if.else.i.i:                                      ; preds = %entry
+  %_M_p.i21.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 0, i32 0
+  store i8* %17, i8** %_M_p.i21.i.i, align 8, !tbaa !59, !alias.scope !65
+  %_M_allocated_capacity.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 2, i32 0
+  %19 = load i64, i64* %_M_allocated_capacity.i.i, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 2, i32 0
+  store i64 %19, i64* %_M_allocated_capacity.i.i.i, align 8, !tbaa !15, !alias.scope !65
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit: ; preds = %if.else.i.i, %if.then.i.i73
+  %_M_string_length.i20.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i, i64 0, i32 1
+  %20 = load i64, i64* %_M_string_length.i20.i.i, align 8, !tbaa !56
+  %_M_string_length.i.i2.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 1
+  store i64 %20, i64* %_M_string_length.i.i2.i, align 8, !tbaa !56, !alias.scope !65
+  %21 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i to %union.anon**
+  store %union.anon* %18, %union.anon** %21, align 8, !tbaa !59
+  store i64 0, i64* %_M_string_length.i20.i.i, align 8, !tbaa !56
+  store i8 0, i8* %arraydecay.i.i.i.i71, align 1, !tbaa !39
+  %22 = load i8*, i8** %_M_p.i13.i.i.i.i61, align 8, !tbaa !59
+  %cmp.i.i.i76 = icmp eq i8* %22, %10
+  br i1 %cmp.i.i.i76, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78, label %if.then.i.i77
+
+if.then.i.i77:                                    ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+  call void @_ZdlPv(i8* %22) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78: ; preds = %if.then.i.i77, %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %7) #2
+  %23 = bitcast %"class.std::__cxx11::basic_string"* %conv1_w_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %23) #2
+  %24 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp3 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %24) #2
+  %25 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 2
+  %26 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp3 to %union.anon**
+  store %union.anon* %25, %union.anon** %26, align 8, !tbaa !55
+  %27 = bitcast %union.anon* %25 to i8*
+  %28 = bitcast i64* %__dnew.i.i.i.i84 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %28) #2
+  store i64 22, i64* %__dnew.i.i.i.i84, align 8, !tbaa !15
+  %call5.i.i.i.i89 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp3, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i84, i64 0) #2
+  %_M_p.i13.i.i.i.i90 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i89, i8** %_M_p.i13.i.i.i.i90, align 8, !tbaa !59
+  %29 = load i64, i64* %__dnew.i.i.i.i84, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i.i.i91 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 2, i32 0
+  store i64 %29, i64* %_M_allocated_capacity.i.i.i.i.i91, align 8, !tbaa !15
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i89, i8* nonnull getelementptr inbounds ([23 x i8], [23 x i8]* @.str.25, i64 0, i64 0), i64 22, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i97 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp3, i64 0, i32 1
+  store i64 %29, i64* %_M_string_length.i.i.i.i.i.i97, align 8, !tbaa !56
+  %30 = load i8*, i8** %_M_p.i13.i.i.i.i90, align 8, !tbaa !59
+  %arrayidx.i.i.i.i.i98 = getelementptr inbounds i8, i8* %30, i64 %29
+  store i8 0, i8* %arrayidx.i.i.i.i.i98, align 1, !tbaa !39
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %28) #2
+  %31 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !56, !noalias !68
+  %32 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59, !noalias !68
+  %call3.i.i.i102 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp3, i64 0, i64 0, i8* %32, i64 %31) #2, !noalias !68
+  %33 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 2
+  %34 = bitcast %"class.std::__cxx11::basic_string"* %conv1_w_path to %union.anon**
+  store %union.anon* %33, %union.anon** %34, align 8, !tbaa !55, !alias.scope !68
+  %_M_p.i.i23.i.i103 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 0, i32 0
+  %35 = load i8*, i8** %_M_p.i.i23.i.i103, align 8, !tbaa !59
+  %36 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 2
+  %arraydecay.i.i.i.i104 = bitcast %union.anon* %36 to i8*
+  %cmp.i.i.i105 = icmp eq i8* %35, %arraydecay.i.i.i.i104
+  br i1 %cmp.i.i.i105, label %if.then.i.i107, label %if.else.i.i111
+
+if.then.i.i107:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78
+  %arraydecay.i.i.i106 = bitcast %union.anon* %33 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i106, i8* %35, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+
+if.else.i.i111:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit78
+  %_M_p.i21.i.i108 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 0, i32 0
+  store i8* %35, i8** %_M_p.i21.i.i108, align 8, !tbaa !59, !alias.scope !68
+  %_M_allocated_capacity.i.i109 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 2, i32 0
+  %37 = load i64, i64* %_M_allocated_capacity.i.i109, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i110 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 2, i32 0
+  store i64 %37, i64* %_M_allocated_capacity.i.i.i110, align 8, !tbaa !15, !alias.scope !68
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114: ; preds = %if.else.i.i111, %if.then.i.i107
+  %_M_string_length.i20.i.i112 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i102, i64 0, i32 1
+  %38 = load i64, i64* %_M_string_length.i20.i.i112, align 8, !tbaa !56
+  %_M_string_length.i.i2.i113 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 1
+  store i64 %38, i64* %_M_string_length.i.i2.i113, align 8, !tbaa !56, !alias.scope !68
+  %39 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i102 to %union.anon**
+  store %union.anon* %36, %union.anon** %39, align 8, !tbaa !59
+  store i64 0, i64* %_M_string_length.i20.i.i112, align 8, !tbaa !56
+  store i8 0, i8* %arraydecay.i.i.i.i104, align 1, !tbaa !39
+  %40 = load i8*, i8** %_M_p.i13.i.i.i.i90, align 8, !tbaa !59
+  %cmp.i.i.i117 = icmp eq i8* %40, %27
+  br i1 %cmp.i.i.i117, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119, label %if.then.i.i118
+
+if.then.i.i118:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+  call void @_ZdlPv(i8* %40) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119: ; preds = %if.then.i.i118, %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit114
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %24) #2
+  %41 = bitcast %"class.std::__cxx11::basic_string"* %conv1_b_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %41) #2
+  %42 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp5 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %42) #2
+  %43 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 2
+  %44 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp5 to %union.anon**
+  store %union.anon* %43, %union.anon** %44, align 8, !tbaa !55
+  %45 = bitcast %union.anon* %43 to i8*
+  %46 = bitcast i64* %__dnew.i.i.i.i125 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %46) #2
+  store i64 27, i64* %__dnew.i.i.i.i125, align 8, !tbaa !15
+  %call5.i.i.i.i130 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp5, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i125, i64 0) #2
+  %_M_p.i13.i.i.i.i131 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i130, i8** %_M_p.i13.i.i.i.i131, align 8, !tbaa !59
+  %47 = load i64, i64* %__dnew.i.i.i.i125, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i.i.i132 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 2, i32 0
+  store i64 %47, i64* %_M_allocated_capacity.i.i.i.i.i132, align 8, !tbaa !15
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i130, i8* nonnull getelementptr inbounds ([28 x i8], [28 x i8]* @.str.26, i64 0, i64 0), i64 27, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i138 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp5, i64 0, i32 1
+  store i64 %47, i64* %_M_string_length.i.i.i.i.i.i138, align 8, !tbaa !56
+  %48 = load i8*, i8** %_M_p.i13.i.i.i.i131, align 8, !tbaa !59
+  %arrayidx.i.i.i.i.i139 = getelementptr inbounds i8, i8* %48, i64 %47
+  store i8 0, i8* %arrayidx.i.i.i.i.i139, align 1, !tbaa !39
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %46) #2
+  %49 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !56, !noalias !71
+  %50 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59, !noalias !71
+  %call3.i.i.i143 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp5, i64 0, i64 0, i8* %50, i64 %49) #2, !noalias !71
+  %51 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 2
+  %52 = bitcast %"class.std::__cxx11::basic_string"* %conv1_b_path to %union.anon**
+  store %union.anon* %51, %union.anon** %52, align 8, !tbaa !55, !alias.scope !71
+  %_M_p.i.i23.i.i144 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 0, i32 0
+  %53 = load i8*, i8** %_M_p.i.i23.i.i144, align 8, !tbaa !59
+  %54 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 2
+  %arraydecay.i.i.i.i145 = bitcast %union.anon* %54 to i8*
+  %cmp.i.i.i146 = icmp eq i8* %53, %arraydecay.i.i.i.i145
+  br i1 %cmp.i.i.i146, label %if.then.i.i148, label %if.else.i.i152
+
+if.then.i.i148:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119
+  %arraydecay.i.i.i147 = bitcast %union.anon* %51 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i147, i8* %53, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+
+if.else.i.i152:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit119
+  %_M_p.i21.i.i149 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 0, i32 0
+  store i8* %53, i8** %_M_p.i21.i.i149, align 8, !tbaa !59, !alias.scope !71
+  %_M_allocated_capacity.i.i150 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 2, i32 0
+  %55 = load i64, i64* %_M_allocated_capacity.i.i150, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i151 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 2, i32 0
+  store i64 %55, i64* %_M_allocated_capacity.i.i.i151, align 8, !tbaa !15, !alias.scope !71
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155: ; preds = %if.else.i.i152, %if.then.i.i148
+  %_M_string_length.i20.i.i153 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i143, i64 0, i32 1
+  %56 = load i64, i64* %_M_string_length.i20.i.i153, align 8, !tbaa !56
+  %_M_string_length.i.i2.i154 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 1
+  store i64 %56, i64* %_M_string_length.i.i2.i154, align 8, !tbaa !56, !alias.scope !71
+  %57 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i143 to %union.anon**
+  store %union.anon* %54, %union.anon** %57, align 8, !tbaa !59
+  store i64 0, i64* %_M_string_length.i20.i.i153, align 8, !tbaa !56
+  store i8 0, i8* %arraydecay.i.i.i.i145, align 1, !tbaa !39
+  %58 = load i8*, i8** %_M_p.i13.i.i.i.i131, align 8, !tbaa !59
+  %cmp.i.i.i158 = icmp eq i8* %58, %45
+  br i1 %cmp.i.i.i158, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160, label %if.then.i.i159
+
+if.then.i.i159:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+  call void @_ZdlPv(i8* %58) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160: ; preds = %if.then.i.i159, %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit155
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %42) #2
+  %59 = bitcast %"class.std::__cxx11::basic_string"* %conv2_w_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %59) #2
+  %60 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp7 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %60) #2
+  %61 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 2
+  %62 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp7 to %union.anon**
+  store %union.anon* %61, %union.anon** %62, align 8, !tbaa !55
+  %63 = bitcast %union.anon* %61 to i8*
+  %64 = bitcast i64* %__dnew.i.i.i.i166 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %64) #2
+  store i64 22, i64* %__dnew.i.i.i.i166, align 8, !tbaa !15
+  %call5.i.i.i.i171 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp7, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i166, i64 0) #2
+  %_M_p.i13.i.i.i.i172 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i171, i8** %_M_p.i13.i.i.i.i172, align 8, !tbaa !59
+  %65 = load i64, i64* %__dnew.i.i.i.i166, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i.i.i173 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 2, i32 0
+  store i64 %65, i64* %_M_allocated_capacity.i.i.i.i.i173, align 8, !tbaa !15
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i171, i8* nonnull getelementptr inbounds ([23 x i8], [23 x i8]* @.str.27, i64 0, i64 0), i64 22, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i179 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp7, i64 0, i32 1
+  store i64 %65, i64* %_M_string_length.i.i.i.i.i.i179, align 8, !tbaa !56
+  %66 = load i8*, i8** %_M_p.i13.i.i.i.i172, align 8, !tbaa !59
+  %arrayidx.i.i.i.i.i180 = getelementptr inbounds i8, i8* %66, i64 %65
+  store i8 0, i8* %arrayidx.i.i.i.i.i180, align 1, !tbaa !39
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %64) #2
+  %67 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !56, !noalias !74
+  %68 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59, !noalias !74
+  %call3.i.i.i184 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp7, i64 0, i64 0, i8* %68, i64 %67) #2, !noalias !74
+  %69 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 2
+  %70 = bitcast %"class.std::__cxx11::basic_string"* %conv2_w_path to %union.anon**
+  store %union.anon* %69, %union.anon** %70, align 8, !tbaa !55, !alias.scope !74
+  %_M_p.i.i23.i.i185 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 0, i32 0
+  %71 = load i8*, i8** %_M_p.i.i23.i.i185, align 8, !tbaa !59
+  %72 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 2
+  %arraydecay.i.i.i.i186 = bitcast %union.anon* %72 to i8*
+  %cmp.i.i.i187 = icmp eq i8* %71, %arraydecay.i.i.i.i186
+  br i1 %cmp.i.i.i187, label %if.then.i.i189, label %if.else.i.i193
+
+if.then.i.i189:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160
+  %arraydecay.i.i.i188 = bitcast %union.anon* %69 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i188, i8* %71, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+
+if.else.i.i193:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit160
+  %_M_p.i21.i.i190 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 0, i32 0
+  store i8* %71, i8** %_M_p.i21.i.i190, align 8, !tbaa !59, !alias.scope !74
+  %_M_allocated_capacity.i.i191 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 2, i32 0
+  %73 = load i64, i64* %_M_allocated_capacity.i.i191, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i192 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 2, i32 0
+  store i64 %73, i64* %_M_allocated_capacity.i.i.i192, align 8, !tbaa !15, !alias.scope !74
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196: ; preds = %if.else.i.i193, %if.then.i.i189
+  %_M_string_length.i20.i.i194 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i184, i64 0, i32 1
+  %74 = load i64, i64* %_M_string_length.i20.i.i194, align 8, !tbaa !56
+  %_M_string_length.i.i2.i195 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 1
+  store i64 %74, i64* %_M_string_length.i.i2.i195, align 8, !tbaa !56, !alias.scope !74
+  %75 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i184 to %union.anon**
+  store %union.anon* %72, %union.anon** %75, align 8, !tbaa !59
+  store i64 0, i64* %_M_string_length.i20.i.i194, align 8, !tbaa !56
+  store i8 0, i8* %arraydecay.i.i.i.i186, align 1, !tbaa !39
+  %76 = load i8*, i8** %_M_p.i13.i.i.i.i172, align 8, !tbaa !59
+  %cmp.i.i.i199 = icmp eq i8* %76, %63
+  br i1 %cmp.i.i.i199, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201, label %if.then.i.i200
+
+if.then.i.i200:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+  call void @_ZdlPv(i8* %76) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201: ; preds = %if.then.i.i200, %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit196
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %60) #2
+  %77 = bitcast %"class.std::__cxx11::basic_string"* %conv2_b_path to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %77) #2
+  %78 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp9 to i8*
+  call void @llvm.lifetime.start(i64 32, i8* nonnull %78) #2
+  %79 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 2
+  %80 = bitcast %"class.std::__cxx11::basic_string"* %ref.tmp9 to %union.anon**
+  store %union.anon* %79, %union.anon** %80, align 8, !tbaa !55
+  %81 = bitcast %union.anon* %79 to i8*
+  %82 = bitcast i64* %__dnew.i.i.i.i207 to i8*
+  call void @llvm.lifetime.start(i64 8, i8* nonnull %82) #2
+  store i64 27, i64* %__dnew.i.i.i.i207, align 8, !tbaa !15
+  %call5.i.i.i.i212 = call i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp9, i64* nonnull dereferenceable(8) %__dnew.i.i.i.i207, i64 0) #2
+  %_M_p.i13.i.i.i.i213 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 0, i32 0
+  store i8* %call5.i.i.i.i212, i8** %_M_p.i13.i.i.i.i213, align 8, !tbaa !59
+  %83 = load i64, i64* %__dnew.i.i.i.i207, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i.i.i214 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 2, i32 0
+  store i64 %83, i64* %_M_allocated_capacity.i.i.i.i.i214, align 8, !tbaa !15
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call5.i.i.i.i212, i8* nonnull getelementptr inbounds ([28 x i8], [28 x i8]* @.str.28, i64 0, i64 0), i64 27, i32 1, i1 false) #2
+  %_M_string_length.i.i.i.i.i.i220 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %ref.tmp9, i64 0, i32 1
+  store i64 %83, i64* %_M_string_length.i.i.i.i.i.i220, align 8, !tbaa !56
+  %84 = load i8*, i8** %_M_p.i13.i.i.i.i213, align 8, !tbaa !59
+  %arrayidx.i.i.i.i.i221 = getelementptr inbounds i8, i8* %84, i64 %83
+  store i8 0, i8* %arrayidx.i.i.i.i.i221, align 1, !tbaa !39
+  call void @llvm.lifetime.end(i64 8, i8* nonnull %82) #2
+  %85 = load i64, i64* %_M_string_length.i.i.i.i.i.i, align 8, !tbaa !56, !noalias !77
+  %86 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59, !noalias !77
+  %call3.i.i.i225 = call dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"* nonnull %ref.tmp9, i64 0, i64 0, i8* %86, i64 %85) #2, !noalias !77
+  %87 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 2
+  %88 = bitcast %"class.std::__cxx11::basic_string"* %conv2_b_path to %union.anon**
+  store %union.anon* %87, %union.anon** %88, align 8, !tbaa !55, !alias.scope !77
+  %_M_p.i.i23.i.i226 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 0, i32 0
+  %89 = load i8*, i8** %_M_p.i.i23.i.i226, align 8, !tbaa !59
+  %90 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 2
+  %arraydecay.i.i.i.i227 = bitcast %union.anon* %90 to i8*
+  %cmp.i.i.i228 = icmp eq i8* %89, %arraydecay.i.i.i.i227
+  br i1 %cmp.i.i.i228, label %if.then.i.i230, label %if.else.i.i234
+
+if.then.i.i230:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201
+  %arraydecay.i.i.i229 = bitcast %union.anon* %87 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay.i.i.i229, i8* %89, i64 16, i32 1, i1 false) #2
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+
+if.else.i.i234:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit201
+  %_M_p.i21.i.i231 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 0, i32 0
+  store i8* %89, i8** %_M_p.i21.i.i231, align 8, !tbaa !59, !alias.scope !77
+  %_M_allocated_capacity.i.i232 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 2, i32 0
+  %91 = load i64, i64* %_M_allocated_capacity.i.i232, align 8, !tbaa !15
+  %_M_allocated_capacity.i.i.i233 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 2, i32 0
+  store i64 %91, i64* %_M_allocated_capacity.i.i.i233, align 8, !tbaa !15, !alias.scope !77
+  br label %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+
+_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237: ; preds = %if.else.i.i234, %if.then.i.i230
+  %_M_string_length.i20.i.i235 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %call3.i.i.i225, i64 0, i32 1
+  %92 = load i64, i64* %_M_string_length.i20.i.i235, align 8, !tbaa !56
+  %_M_string_length.i.i2.i236 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 1
+  store i64 %92, i64* %_M_string_length.i.i2.i236, align 8, !tbaa !56, !alias.scope !77
+  %93 = bitcast %"class.std::__cxx11::basic_string"* %call3.i.i.i225 to %union.anon**
+  store %union.anon* %90, %union.anon** %93, align 8, !tbaa !59
+  store i64 0, i64* %_M_string_length.i20.i.i235, align 8, !tbaa !56
+  store i8 0, i8* %arraydecay.i.i.i.i227, align 1, !tbaa !39
+  %94 = load i8*, i8** %_M_p.i13.i.i.i.i213, align 8, !tbaa !59
+  %cmp.i.i.i240 = icmp eq i8* %94, %81
+  br i1 %cmp.i.i.i240, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242, label %if.then.i.i241
+
+if.then.i.i241:                                   ; preds = %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+  call void @_ZdlPv(i8* %94) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242: ; preds = %if.then.i.i241, %_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_.exit237
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %78) #2
+  %_M_p.i.i = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %input_data_path, i64 0, i32 0, i32 0
+  %95 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.29, i64 0, i64 0), i8* %95)
+  %96 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %call13 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %96, i32 0, i32 1000, i32 1, i32 28, i32 28)
+  %_M_p.i.i245 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_w_path, i64 0, i32 0, i32 0
+  %97 = load i8*, i8** %_M_p.i.i245, align 8, !tbaa !59
+  %call15 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %97, i32 0, i32 32, i32 1, i32 5, i32 5)
+  %_M_p.i.i247 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv1_b_path, i64 0, i32 0, i32 0
+  %98 = load i8*, i8** %_M_p.i.i247, align 8, !tbaa !59
+  %call17 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %98, i32 0, i32 1, i32 32, i32 1, i32 1)
+  %_M_p.i.i246 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_w_path, i64 0, i32 0, i32 0
+  %99 = load i8*, i8** %_M_p.i.i246, align 8, !tbaa !59
+  %call19 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %99, i32 0, i32 64, i32 32, i32 5, i32 5)
+  %_M_p.i.i244 = getelementptr inbounds %"class.std::__cxx11::basic_string", %"class.std::__cxx11::basic_string"* %conv2_b_path, i64 0, i32 0, i32 0
+  %100 = load i8*, i8** %_M_p.i.i244, align 8, !tbaa !59
+  %call21 = call %struct.Tensor* @_Z18readTrainedWeightsPKciiiii(i8* %100, i32 0, i32 1, i32 64, i32 1, i32 1)
+  call void @llvm.visc.init()
+  %call22 = call noalias i8* @malloc(i64 96) #2
+  %x23 = bitcast i8* %call22 to i8**
+  %101 = bitcast i8* %call22 to %struct.Tensor**
+  store %struct.Tensor* %call13, %struct.Tensor** %101, align 1, !tbaa !80
+  %x_bytes = getelementptr inbounds i8, i8* %call22, i64 8
+  %102 = bitcast i8* %x_bytes to i64*
+  store i64 0, i64* %102, align 1, !tbaa !83
+  %conv1_w24 = getelementptr inbounds i8, i8* %call22, i64 16
+  %103 = bitcast i8* %conv1_w24 to %struct.Tensor**
+  store %struct.Tensor* %call15, %struct.Tensor** %103, align 1, !tbaa !84
+  %conv1_w_bytes = getelementptr inbounds i8, i8* %call22, i64 24
+  %104 = bitcast i8* %conv1_w_bytes to i64*
+  store i64 0, i64* %104, align 1, !tbaa !85
+  %conv1_b25 = getelementptr inbounds i8, i8* %call22, i64 32
+  %105 = bitcast i8* %conv1_b25 to %struct.Tensor**
+  store %struct.Tensor* %call17, %struct.Tensor** %105, align 1, !tbaa !86
+  %conv1_b_bytes = getelementptr inbounds i8, i8* %call22, i64 40
+  %106 = bitcast i8* %conv1_b_bytes to i64*
+  store i64 0, i64* %106, align 1, !tbaa !87
+  %conv2_w26 = getelementptr inbounds i8, i8* %call22, i64 48
+  %107 = bitcast i8* %conv2_w26 to %struct.Tensor**
+  store %struct.Tensor* %call19, %struct.Tensor** %107, align 1, !tbaa !88
+  %conv2_w_bytes = getelementptr inbounds i8, i8* %call22, i64 56
+  %108 = bitcast i8* %conv2_w_bytes to i64*
+  store i64 0, i64* %108, align 1, !tbaa !89
+  %conv2_b27 = getelementptr inbounds i8, i8* %call22, i64 64
+  %109 = bitcast i8* %conv2_b27 to %struct.Tensor**
+  store %struct.Tensor* %call21, %struct.Tensor** %109, align 1, !tbaa !90
+  %conv2_b_bytes = getelementptr inbounds i8, i8* %call22, i64 72
+  %110 = bitcast i8* %conv2_b_bytes to i64*
+  store i64 0, i64* %110, align 1, !tbaa !91
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_mS_mS_m_cloned to i8*), i8* %call22, i1 false)
+  call void @llvm.visc.wait(i8* %graphID)
+  %111 = load i8*, i8** %x23, align 1, !tbaa !80
+  call void @hpvm_request_tensor(i8* %111, i32 0) #2
+  call void @llvm.visc.cleanup()
+  %112 = load i8*, i8** %_M_p.i.i244, align 8, !tbaa !59
+  %arraydecay.i.i.i.i203 = bitcast %union.anon* %87 to i8*
+  %cmp.i.i.i204 = icmp eq i8* %112, %arraydecay.i.i.i.i203
+  br i1 %cmp.i.i.i204, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206, label %if.then.i.i205
+
+if.then.i.i205:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242
+  call void @_ZdlPv(i8* %112) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206: ; preds = %if.then.i.i205, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit242
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %77) #2
+  %113 = load i8*, i8** %_M_p.i.i246, align 8, !tbaa !59
+  %arraydecay.i.i.i.i162 = bitcast %union.anon* %69 to i8*
+  %cmp.i.i.i163 = icmp eq i8* %113, %arraydecay.i.i.i.i162
+  br i1 %cmp.i.i.i163, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165, label %if.then.i.i164
+
+if.then.i.i164:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206
+  call void @_ZdlPv(i8* %113) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165: ; preds = %if.then.i.i164, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit206
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %59) #2
+  %114 = load i8*, i8** %_M_p.i.i247, align 8, !tbaa !59
+  %arraydecay.i.i.i.i121 = bitcast %union.anon* %51 to i8*
+  %cmp.i.i.i122 = icmp eq i8* %114, %arraydecay.i.i.i.i121
+  br i1 %cmp.i.i.i122, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124, label %if.then.i.i123
+
+if.then.i.i123:                                   ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165
+  call void @_ZdlPv(i8* %114) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124: ; preds = %if.then.i.i123, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit165
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %41) #2
+  %115 = load i8*, i8** %_M_p.i.i245, align 8, !tbaa !59
+  %arraydecay.i.i.i.i80 = bitcast %union.anon* %33 to i8*
+  %cmp.i.i.i81 = icmp eq i8* %115, %arraydecay.i.i.i.i80
+  br i1 %cmp.i.i.i81, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83, label %if.then.i.i82
+
+if.then.i.i82:                                    ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124
+  call void @_ZdlPv(i8* %115) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83: ; preds = %if.then.i.i82, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit124
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %23) #2
+  %116 = load i8*, i8** %_M_p.i.i, align 8, !tbaa !59
+  %arraydecay.i.i.i.i51 = bitcast %union.anon* %15 to i8*
+  %cmp.i.i.i52 = icmp eq i8* %116, %arraydecay.i.i.i.i51
+  br i1 %cmp.i.i.i52, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54, label %if.then.i.i53
+
+if.then.i.i53:                                    ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83
+  call void @_ZdlPv(i8* %116) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54: ; preds = %if.then.i.i53, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit83
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %6) #2
+  %117 = load i8*, i8** %_M_p.i13.i.i.i.i, align 8, !tbaa !59
+  %cmp.i.i.i = icmp eq i8* %117, %3
+  br i1 %cmp.i.i.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54
+  call void @_ZdlPv(i8* %117) #2
+  br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit
+
+_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit: ; preds = %if.then.i.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit54
+  call void @llvm.lifetime.end(i64 32, i8* nonnull %0) #2
+  ret i32 0
+}
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdlPv(i8*) local_unnamed_addr #7
+
+declare void @_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E(%"class.std::basic_ios"*, %"class.std::basic_streambuf"*) local_unnamed_addr #0
+
+; Function Attrs: nounwind
+declare void @_ZNSt8ios_baseC2Ev(%"class.std::ios_base"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare void @_ZNSt6localeC1Ev(%"class.std::locale"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare void @_ZNSt6localeD1Ev(%"class.std::locale"*) unnamed_addr #1
+
+; Function Attrs: nounwind
+declare void @_ZNSt8ios_baseD2Ev(%"class.std::ios_base"*) unnamed_addr #1
+
+declare dereferenceable(272) %"class.std::basic_ostream"* @_ZNSo9_M_insertIdEERSoT_(%"class.std::basic_ostream"*, double) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+declare void @_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv(%"class.std::__cxx11::basic_string"* noalias sret, %"class.std::__cxx11::basic_stringbuf"*) local_unnamed_addr #3 align 2
+
+declare i8* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(%"class.std::__cxx11::basic_string"*, i64* dereferenceable(8), i64) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #4
+
+declare dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm(%"class.std::__cxx11::basic_string"*, i64, i64, i8*, i64) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define internal void @_GLOBAL__sub_I_lenet.cpp() #3 section ".text.startup" {
+entry:
+  tail call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* nonnull @_ZStL8__ioinit) #2
+  %0 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #2
+
+declare i32 @putchar(i32)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.visc.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #2
+
+; Function Attrs: nounwind uwtable
+define %struct.out._Z14tensorConvNodePvmS_m @_Z14tensorConvNodePvmS_m_cloned(i8* in %t1, i64 %bytes1, i8* in %t2, i64 %bytes2) #3 {
+entry:
+  %call1 = call i8* @llvm.visc.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1)
+  %returnStruct = insertvalue %struct.out._Z14tensorConvNodePvmS_m undef, i8* %call1, 0
+  %returnStruct2 = insertvalue %struct.out._Z14tensorConvNodePvmS_m %returnStruct, i64 0, 1
+  ret %struct.out._Z14tensorConvNodePvmS_m %returnStruct2
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.visc.tensor.add(i8*, i8*) #2
+
+; Function Attrs: nounwind uwtable
+define %struct.out._Z13tensorAddNodePvmS_m @_Z13tensorAddNodePvmS_m_cloned(i8* in %t1, i64 %bytest1, i8* in %t2, i64 %bytest2) #3 {
+entry:
+  %call1 = call i8* @llvm.visc.tensor.add(i8* %t1, i8* %t2)
+  %returnStruct = insertvalue %struct.out._Z13tensorAddNodePvmS_m undef, i8* %call1, 0
+  %returnStruct2 = insertvalue %struct.out._Z13tensorAddNodePvmS_m %returnStruct, i64 0, 1
+  ret %struct.out._Z13tensorAddNodePvmS_m %returnStruct2
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.visc.createNode(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.input(i8*, i32, i32, i1) #2
+
+; Function Attrs: nounwind
+declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1) #2
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.output(i8*, i32, i32, i1) #2
+
+; Function Attrs: nounwind uwtable
+define %struct.out._Z4rootPvmS_mS_mS_mS_m @_Z4rootPvmS_mS_mS_mS_m_cloned(i8* in %x, i64 %x_bytes, i8* in %conv1_w, i64 %conv1_w_bytes, i8* in %conv1_b, i64 %conv1_b_bytes, i8* in %conv2_w, i64 %conv2_w_bytes, i8* in %conv2_b, i64 %conv2_b_bytes) #3 {
+entry:
+  %_Z14tensorConvNodePvmS_m_cloned.node = call i8* @llvm.visc.createNode(i8* bitcast (%struct.out._Z14tensorConvNodePvmS_m (i8*, i64, i8*, i64)* @_Z14tensorConvNodePvmS_m_cloned to i8*))
+  %_Z13tensorAddNodePvmS_m_cloned.node = call i8* @llvm.visc.createNode(i8* bitcast (%struct.out._Z13tensorAddNodePvmS_m (i8*, i64, i8*, i64)* @_Z13tensorAddNodePvmS_m_cloned to i8*))
+  call void @llvm.visc.bind.input(i8* %_Z14tensorConvNodePvmS_m_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.visc.bind.input(i8* %_Z14tensorConvNodePvmS_m_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.visc.bind.input(i8* %_Z14tensorConvNodePvmS_m_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.visc.bind.input(i8* %_Z14tensorConvNodePvmS_m_cloned.node, i32 3, i32 3, i1 false)
+  %output = call i8* @llvm.visc.createEdge(i8* %_Z14tensorConvNodePvmS_m_cloned.node, i8* %_Z13tensorAddNodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false)
+  %output1 = call i8* @llvm.visc.createEdge(i8* %_Z14tensorConvNodePvmS_m_cloned.node, i8* %_Z13tensorAddNodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false)
+  call void @llvm.visc.bind.input(i8* %_Z13tensorAddNodePvmS_m_cloned.node, i32 4, i32 2, i1 false)
+  call void @llvm.visc.bind.input(i8* %_Z13tensorAddNodePvmS_m_cloned.node, i32 5, i32 3, i1 false)
+  call void @llvm.visc.bind.output(i8* %_Z13tensorAddNodePvmS_m_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.visc.bind.output(i8* %_Z13tensorAddNodePvmS_m_cloned.node, i32 1, i32 1, i1 false)
+  ret %struct.out._Z4rootPvmS_mS_mS_mS_m undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #2
+
+; Function Attrs: nounwind
+declare i8* @llvm.visc.launch(i8*, i8*, i1) #2
+
+; Function Attrs: nounwind
+declare void @llvm.visc.wait(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #2
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { argmemonly nounwind }
+attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #6 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #7 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #8 = { noreturn nounwind }
+
+!llvm.ident = !{!0}
+!visc_hint_cudnn = !{!1, !2}
+!visc_hint_gpu = !{}
+!visc_hint_spir = !{}
+!visc_hint_promise = !{}
+!visc_hint_cpu = !{!3}
+!visc_hint_cpu_gpu = !{}
+!visc_hint_cpu_spir = !{}
+
+!0 = !{!"clang version 4.0.1 "}
+!1 = !{%struct.out._Z14tensorConvNodePvmS_m (i8*, i64, i8*, i64)* @_Z14tensorConvNodePvmS_m_cloned}
+!2 = !{%struct.out._Z13tensorAddNodePvmS_m (i8*, i64, i8*, i64)* @_Z13tensorAddNodePvmS_m_cloned}
+!3 = !{%struct.out._Z4rootPvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_mS_mS_m_cloned}
+!4 = !{!5, !10, i64 40}
+!5 = !{!"_ZTS6Tensor", !6, i64 0, !6, i64 4, !9, i64 8, !10, i64 16, !10, i64 24, !10, i64 32, !10, i64 40, !11, i64 48, !11, i64 56, !12, i64 64}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C++ TBAA"}
+!9 = !{!"_ZTS15data_location_t", !7, i64 0}
+!10 = !{!"any pointer", !7, i64 0}
+!11 = !{!"long", !7, i64 0}
+!12 = !{!"_ZTS9Dimension", !6, i64 0, !10, i64 8}
+!13 = !{!5, !6, i64 64}
+!14 = !{!5, !10, i64 72}
+!15 = !{!11, !11, i64 0}
+!16 = !{!5, !11, i64 48}
+!17 = !{!5, !11, i64 56}
+!18 = !{!5, !10, i64 32}
+!19 = !{!5, !6, i64 0}
+!20 = !{!21, !21, i64 0}
+!21 = !{!"float", !7, i64 0}
+!22 = distinct !{!22, !23}
+!23 = !{!"llvm.loop.unroll.disable"}
+!24 = distinct !{!24, !25, !26}
+!25 = !{!"llvm.loop.vectorize.width", i32 1}
+!26 = !{!"llvm.loop.interleave.count", i32 1}
+!27 = distinct !{!27, !25, !26}
+!28 = distinct !{!28, !23}
+!29 = distinct !{!29, !25, !26}
+!30 = distinct !{!30, !23}
+!31 = distinct !{!31, !25, !26}
+!32 = distinct !{!32, !25, !26}
+!33 = distinct !{!33, !25, !26}
+!34 = distinct !{!34, !23}
+!35 = distinct !{!35, !25, !26}
+!36 = distinct !{!36, !25, !26}
+!37 = distinct !{!37, !25, !26}
+!38 = distinct !{!38, !25, !26}
+!39 = !{!7, !7, i64 0}
+!40 = distinct !{!40, !25, !26}
+!41 = distinct !{!41, !42, !25, !26}
+!42 = !{!"llvm.loop.unroll.runtime.disable"}
+!43 = !{!44, !44, i64 0}
+!44 = !{!"vtable pointer", !8, i64 0}
+!45 = !{!46, !10, i64 216}
+!46 = !{!"_ZTSSt9basic_iosIcSt11char_traitsIcEE", !10, i64 216, !7, i64 224, !47, i64 225, !10, i64 232, !10, i64 240, !10, i64 248, !10, i64 256}
+!47 = !{!"bool", !7, i64 0}
+!48 = !{!46, !7, i64 224}
+!49 = !{!46, !47, i64 225}
+!50 = !{!51, !52, i64 64}
+!51 = !{!"_ZTSNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE", !52, i64 64, !53, i64 72}
+!52 = !{!"_ZTSSt13_Ios_Openmode", !7, i64 0}
+!53 = !{!"_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE", !54, i64 0, !11, i64 8, !7, i64 16}
+!54 = !{!"_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderE", !10, i64 0}
+!55 = !{!54, !10, i64 0}
+!56 = !{!53, !11, i64 8}
+!57 = !{!58, !58, i64 0}
+!58 = !{!"_ZTSSt13_Ios_Fmtflags", !7, i64 0}
+!59 = !{!53, !10, i64 0}
+!60 = !{!61, !58, i64 24}
+!61 = !{!"_ZTSSt8ios_base", !11, i64 8, !11, i64 16, !58, i64 24, !62, i64 28, !62, i64 32, !10, i64 40, !63, i64 48, !7, i64 64, !6, i64 192, !10, i64 200, !64, i64 208}
+!62 = !{!"_ZTSSt12_Ios_Iostate", !7, i64 0}
+!63 = !{!"_ZTSNSt8ios_base6_WordsE", !10, i64 0, !11, i64 8}
+!64 = !{!"_ZTSSt6locale", !10, i64 0}
+!65 = !{!66}
+!66 = distinct !{!66, !67, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!67 = distinct !{!67, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!68 = !{!69}
+!69 = distinct !{!69, !70, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!70 = distinct !{!70, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!71 = !{!72}
+!72 = distinct !{!72, !73, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!73 = distinct !{!73, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!74 = !{!75}
+!75 = distinct !{!75, !76, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!76 = distinct !{!76, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!77 = !{!78}
+!78 = distinct !{!78, !79, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_: %agg.result"}
+!79 = distinct !{!79, !"_ZStplIcSt11char_traitsIcESaIcEENSt7__cxx1112basic_stringIT_T0_T1_EERKS8_OS8_"}
+!80 = !{!81, !10, i64 0}
+!81 = !{!"_ZTS6RootIn", !10, i64 0, !11, i64 8, !10, i64 16, !11, i64 24, !10, i64 32, !11, i64 40, !10, i64 48, !11, i64 56, !10, i64 64, !11, i64 72, !82, i64 80}
+!82 = !{!"_ZTS5ret_t", !10, i64 0, !11, i64 8}
+!83 = !{!81, !11, i64 8}
+!84 = !{!81, !10, i64 16}
+!85 = !{!81, !11, i64 24}
+!86 = !{!81, !10, i64 32}
+!87 = !{!81, !11, i64 40}
+!88 = !{!81, !10, i64 48}
+!89 = !{!81, !11, i64 56}
+!90 = !{!81, !10, i64 64}
+!91 = !{!81, !11, i64 72}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet_linked b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet_linked
new file mode 100755
index 0000000000000000000000000000000000000000..83a7629171a5e4fa97becf2380e06c553098db7e
Binary files /dev/null and b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet_linked differ
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet_linked.bc b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet_linked.bc
new file mode 100644
index 0000000000000000000000000000000000000000..214b0a7ae29ce6507062314e08fe7db8632afeff
Binary files /dev/null and b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/build/lenet_linked.bc differ
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/.#fcl.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/.#fcl.cpp
new file mode 120000
index 0000000000000000000000000000000000000000..2024cd179053642170a67ac93731296f36596129
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/.#fcl.cpp
@@ -0,0 +1 @@
+hsharif3@tyler.cs.illinois.edu.16991:1541049775
\ No newline at end of file
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e588db93bf3246722fe4a2ff7336a55ac542c21f
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
@@ -0,0 +1,132 @@
+
+#include <iostream>
+#include <cstdio>
+#include <cstring>
+#include <cinttypes>
+#include <visc.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+
+using namespace std;
+
+void tensorConvNode(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorAddNode(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void root(void *x, size_t x_bytes,
+	  void *conv1_w, size_t conv1_w_bytes,
+	  void *conv1_b, size_t conv1_b_bytes,
+	  void *conv2_w, size_t conv2_w_bytes,
+	  void *conv2_b, size_t conv2_b_bytes){
+  
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(5, x, conv1_w, conv1_b, conv2_w, conv2_b, 0);
+
+    void *nodeConv1 = __visc__createNodeND(0, tensorConvNode);
+    void *nodeAdd = __visc__createNodeND(0, tensorAddNode);
+
+    // node, src, dst, stream
+    __visc__bindIn(nodeConv1, 0, 0, 0);
+    __visc__bindIn(nodeConv1, 1, 1, 0);
+    __visc__bindIn(nodeConv1, 2, 2, 0);
+    __visc__bindIn(nodeConv1, 3, 3, 0);
+
+    // node, node, type, src, dst, stream
+    __visc__edge(nodeConv1, nodeAdd, 1, 0, 0, 0);
+    __visc__edge(nodeConv1, nodeAdd, 1, 1, 1, 0);
+
+    __visc__bindIn(nodeAdd, 4, 2, 0);
+    __visc__bindIn(nodeAdd, 5, 3, 0);
+
+    __visc__bindOut(nodeAdd, 0, 0, 0);
+    __visc__bindOut(nodeAdd, 1, 1, 0);
+
+}
+
+
+// Return type for the nodes
+struct ret_t {
+    void *tensor;
+    size_t bytes;
+};
+
+typedef struct __attribute__((__packed__)) {
+    void *x;
+    size_t x_bytes;
+    
+    void *conv1_w;
+    size_t conv1_w_bytes;
+    void *conv1_b;
+    size_t conv1_b_bytes;
+
+    void *conv2_w;
+    size_t conv2_w_bytes;
+    void *conv2_b;
+    size_t conv2_b_bytes;
+
+    struct ret_t r;
+}
+RootIn;
+
+int main() {
+
+    int test_batch_size = 1000;
+    std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params";
+    std::string input_data_path = prefix + std::string("/FC_network2/mnist_float_input.bin");
+    std::string conv1_w_path = prefix + std::string("/lenet_keras/conv1.bin");			  
+    std::string conv1_b_path = prefix + std::string("/lenet_keras/conv1_bias.bin");
+    std::string conv2_w_path = prefix + std::string("/lenet_keras/conv2.bin");			  
+    std::string conv2_b_path = prefix + std::string("/lenet_keras/conv2_bias.bin");  
+    
+    printf("Reading Input Data from = %s \n", input_data_path.c_str());
+    
+    void* x = readTrainedWeights(input_data_path.c_str(), float_type,
+                           test_batch_size, 1, 28, 28);
+    void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 32, 1, 5, 5);
+    void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 32, 1, 1);
+    void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 64, 32, 5, 5);
+    void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 64, 1, 1);
+
+    __visc__init();
+
+    RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
+    args->x = x;
+    args->x_bytes = 0;
+    args->conv1_w = conv1_w;
+    args->conv1_w_bytes = 0;
+    args->conv1_b = conv1_b;
+    args->conv1_b_bytes = 0;
+    args->conv2_w = conv2_w;
+    args->conv2_w_bytes = 0;
+    args->conv2_b = conv2_b;
+    args->conv2_b_bytes = 0;
+
+
+    void *dfg = __visc__launch(0, root, (void *)args);
+
+    __visc__wait(dfg);
+
+    // FIXME: Value returned in the wrong index!!
+    //void *r = static_cast<RootIn*>(args)->r.tensor;
+    void *r = static_cast<RootIn*>(args)->x;
+    hpvm_request_tensor(r, 0);
+
+    __visc__cleanup();
+    return 0;
+}
+
+