diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll new file mode 100644 index 0000000000000000000000000000000000000000..69db224a413206727cbecc8a542dc0b353dfe6ff --- /dev/null +++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll @@ -0,0 +1,455 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG -o %t.ll -S %s +; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc +; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s +; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin +; RUN: %t.bin +; ModuleID = 'gemm_opencl.c' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@custom_str = private unnamed_addr constant [12 x i8] c"Value = %d\0A\00", align 1 +@hex_str = private unnamed_addr constant [14 x i8] c"Value = 0x%x\0A\00", align 1 +@ptr_str = private unnamed_addr constant [12 x i8] c"Value = %p\0A\00", align 1 +@.str = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1 +@.str2 = private unnamed_addr constant [28 x i8] c"Computing element (%d, %d)\0A\00", align 1 +@.str3 = private unnamed_addr constant [32 x i8] c"Accessing k = %d, A[%d], B[%d]\0A\00", align 1 +@str = private unnamed_addr constant [17 x i8] c"Entered function\00" +@str10 = private unnamed_addr constant [16 x i8] c"Result computed\00" +@str11 = private unnamed_addr constant [20 x i8] c"Result written to C\00" +@str12 = private unnamed_addr constant [17 x i8] c"Output allocated\00" +@str13 = private unnamed_addr constant [9 x i8] c"\0AFailed!\00" +@str14 = private unnamed_addr constant [7 x i8] c"\0ADone!\00" +@str15 = private unnamed_addr constant [7 x i8] c"\0APass!\00" + +; Function Attrs: nounwind uwtable +define void @randomInit(float* nocapture %data, i32 %size) #0 { +entry: + %cmp3 = icmp sgt i32 %size, 0 + br i1 %cmp3, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %call = tail call i32 @rand() #5 + %conv = sitofp i32 %call to float + %div = fmul float %conv, 0x3E00000000000000 + %arrayidx = getelementptr inbounds float* %data, i64 %indvars.iv + store float %div, float* %arrayidx, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Function Attrs: nounwind +declare i32 @rand() #1 + +; Function Attrs: nounwind readnone uwtable +define i32 @isEqual(float %a, float %b) #2 { +entry: + %sub = fsub float %a, %b + %fabsf = tail call float @fabsf(float %sub) #6 + %0 = fpext float %fabsf to double + %cmp = fcmp olt double %0, 1.000000e-03 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + +; Function Attrs: noinline nounwind uwtable +define i32 @checkResults(float* nocapture %A, float* nocapture %B, float* nocapture %C) #3 { +entry: + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %entry, %for.inc50 + %indvars.iv92 = phi i64 [ 0, %entry ], [ %indvars.iv.next93, %for.inc50 ] + %i.081 = phi i32 [ 0, %entry ], [ %inc51, %for.inc50 ] + %0 = shl nsw i64 %indvars.iv92, 10 + br label %for.body7 + +for.cond4: ; preds = %for.end + %inc48 = add nsw i32 %j.079, 1 + %1 = trunc i64 %indvars.iv.next89 to i32 + %cmp5 = icmp slt i32 %1, 1024 + br i1 %cmp5, label %for.body7, label %for.inc50 + +for.body7: ; preds = %for.cond4.preheader, %for.cond4 + %indvars.iv88 = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next89, %for.cond4 ] + %j.079 = phi i32 [ 0, %for.cond4.preheader ], [ %inc48, %for.cond4 ] + %2 = add nsw i64 %indvars.iv88, %0 + br label %for.body12 + +for.body12: ; preds = %for.body12, %for.body7 + %indvars.iv = phi i64 [ 0, %for.body7 ], [ %indvars.iv.next, %for.body12 ] + %3 = phi float [ 0.000000e+00, %for.body7 ], [ %add26, %for.body12 ] + %4 = add nsw i64 %indvars.iv, %0 + %arrayidx16 = getelementptr inbounds float* %A, i64 %4 + %5 = load float* %arrayidx16, align 4, !tbaa !0 + %6 = shl i64 %indvars.iv, 10 + %7 = add nsw i64 %6, %indvars.iv88 + %arrayidx20 = getelementptr inbounds float* %B, i64 %7 + %8 = load float* %arrayidx20, align 4, !tbaa !0 + %mul21 = fmul float %5, %8 + %add26 = fadd float %3, %mul21 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body12 + +for.end: ; preds = %for.body12 + %arrayidx34 = getelementptr inbounds float* %C, i64 %2 + %9 = load float* %arrayidx34, align 4, !tbaa !0 + %sub.i = fsub float %add26, %9 + %fabsf.i = tail call float @fabsf(float %sub.i) #6 + %10 = fpext float %fabsf.i to double + %cmp.i = fcmp olt double %10, 1.000000e-03 + %indvars.iv.next89 = add i64 %indvars.iv88, 1 + br i1 %cmp.i, label %for.cond4, label %if.then + +if.then: ; preds = %for.end + %conv40 = fpext float %9 to double + %conv45 = fpext float %add26 to double + %call46 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @.str, i64 0, i64 0), i32 %i.081, i32 %j.079, double %conv40, double %conv45) #5 + br label %return + +for.inc50: ; preds = %for.cond4 + %indvars.iv.next93 = add i64 %indvars.iv92, 1 + %inc51 = add nsw i32 %i.081, 1 + %11 = trunc i64 %indvars.iv.next93 to i32 + %cmp = icmp slt i32 %11, 1024 + br i1 %cmp, label %for.cond4.preheader, label %return + +return: ; preds = %for.inc50, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %for.inc50 ] + ret i32 %retval.0 +} + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) #1 + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture, ...) #1 + +; --------------- VISC Intrinsics --------------- +; Return Type of VISC Compute Matrix Mul +%rtype = type {} +%struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, i32, i32, %rtype }> + +; Function Attrs: nounwind +declare i8* @llvm.visc.launch(i8*, i8*) #0 + +; Function Attrs: nounwind +declare void @llvm.visc.wait(i8*) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode(i8*) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode1D(i8*, i32) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.getNode() #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.getParentNode(i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.getNumDims(i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.getNodeInstanceID.x(i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.getNodeInstanceID.y(i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0 + +; Function Attrs: nounwind +declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0 + +; Function Attrs: nounwind +declare void @llvm.visc.bind.input(i8*, i32, i32) + +; Function Attrs: nounwind +declare void @llvm.visc.bind.output(i8*, i32, i32) +; ----------------- VISC intrinsics end ------------------ + +; Function Attrs: nounwind uwtable +define %rtype @matrixMul(float* in nocapture %A, i64 %bytes_A, float* in nocapture %B, i64 %bytes_B, float* out %C, i64 %bytes_C, i32 %k, i32 %n, i32 %m) #0 { +entry: + ;%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0)) + + ; ------------------------- VISC changes ------------------ + ; Replace get_global_id calls with calls to getNode followed by getNumNodeInstances.x + ; Replaced statement -- + ; -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #5 + ; -- %call2 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #5 + %this_node = call i8* @llvm.visc.getNode() + %Lx = call i32 @llvm.visc.getNodeInstanceID.x(i8* %this_node) + %Ly = call i32 @llvm.visc.getNodeInstanceID.y(i8* %this_node) + %LLimitx = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node) + %LLimity = call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node) + + %parent_node = call i8* @llvm.visc.getParentNode(i8* %this_node) + %Gx = call i32 @llvm.visc.getNodeInstanceID.x(i8* %parent_node) + %Gy = call i32 @llvm.visc.getNodeInstanceID.y(i8* %parent_node) + + %tmpx = mul i32 %Gx, %LLimitx + %tmpy = mul i32 %Gy, %LLimity + + %call1 = add i32 %tmpx, %Lx + %call2 = add i32 %tmpy, %Ly + + ;%printcall1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @custom_str, i64 0, i64 0), i32 %call1) #5 + ;%printcall2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @custom_str, i64 0, i64 0), i32 %call2) #5 + + ; ---------------------- VISC changes End ------------------ + + ;%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5 + %cmp44 = icmp eq i32 %k, 0 + br i1 %cmp44, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %mul = mul i32 %call2, %k + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %res.046 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add14, %for.body ] + %0 = trunc i64 %indvars.iv to i32 + %add = add i32 %0, %mul + %mul4 = mul i32 %0, %n + %add5 = add i32 %mul4, %call1 + ;%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5 + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds float* %A, i64 %idxprom + %1 = load float* %arrayidx, align 4, !tbaa !0 + %idxprom11 = zext i32 %add5 to i64 + %arrayidx12 = getelementptr inbounds float* %B, i64 %idxprom11 + %2 = load float* %arrayidx12, align 4, !tbaa !0 + %mul13 = fmul float %1, %2 + %add14 = fadd float %res.046, %mul13 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %k + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add14, %for.body ] + ;%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0)) + %mul16 = mul i32 %call2, %n + %add17 = add i32 %mul16, %call1 + %idxprom18 = zext i32 %add17 to i64 + %arrayidx19 = getelementptr inbounds float* %C, i64 %idxprom18 + store float %res.0.lcssa, float* %arrayidx19, align 4, !tbaa !0 + ;%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0)) + ;%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0)) + ret %rtype undef +} + +; ----------------- VISC SGEMM root node ---------------- +define %rtype @MatrixMulInternal(float* in %h_A, i64 %bytes_A, float* in %h_B, i64 %bytes_B, float* out %h_C, i64 %bytes_C, i32 %WA, i32 %WB, i32 %HA, i32 %blocksize) { + %kernel = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (float*, i64, float*, i64, float*, i64, i32, i32, i32)* @matrixMul to i8*), i32 %blocksize, i32 %blocksize) + ; Bind Inputs + call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); h_A + call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); bytes_A + call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); h_B + call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_B + call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); h_C + call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_C + call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); WA = HB = k + call void @llvm.visc.bind.input(i8* %kernel, i32 7, i32 7); WB = WC = n + call void @llvm.visc.bind.input(i8* %kernel, i32 8, i32 8); HA = HC = m + ; Bind Outputs + ret %rtype undef +} + +; ----------------- VISC SGEMM root node ---------------- +define %rtype @MatrixMulRoot(float* in %h_A, i64 %bytes_A, float* in %h_B, i64 %bytes_B, float* out %h_C, i64 %bytes_C, i32 %WA, i32 %WB, i32 %HA, i32 %gridsize, i32 %blocksize) { + %kernel = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32)* @MatrixMulInternal to i8*),i32 %gridsize, i32 %gridsize) + ; Bind Inputs + call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); h_A + call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); bytes_A + call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); h_B + call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_B + call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); h_C + call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_C + call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); WA = HB = k + call void @llvm.visc.bind.input(i8* %kernel, i32 7, i32 7); WB = WC = n + call void @llvm.visc.bind.input(i8* %kernel, i32 8, i32 8); HA = HC = m + call void @llvm.visc.bind.input(i8* %kernel, i32 10, i32 9); blocksize + ; Bind Outputs + ret %rtype undef +} + +; Function Attrs: noinline nounwind uwtable +;define %rtype @computeMatrixMul(float* nocapture %h_A, i64 %bytes_A, float* nocapture %h_B, i64 %bytes_B, float* %h_C, i64 %bytes_C, i32 %k, i32 %n, i32 %m) #3 { +;entry: +; %cmp18 = icmp eq i32 %m, 0 +; %cmp215 = icmp eq i32 %n, 0 +; %or.cond = or i1 %cmp18, %cmp215 +; br i1 %or.cond, label %for.end6, label %for.body3.lr.ph.us +; +;for.inc4.us: ; preds = %for.body3.us +; %0 = extractvalue %rtype %call.us, 0 +; %1 = extractvalue %rtype %call.us, 1 +; %inc5.us = add i32 %i.019.us, 1 +; %exitcond24 = icmp eq i32 %inc5.us, %m +; br i1 %exitcond24, label %for.end6, label %for.body3.lr.ph.us +; +;for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us +; %j.016.us = phi i32 [ 0, %for.body3.lr.ph.us ], [ %inc.us, %for.body3.us ] +; %call.us = tail call %rtype @matrixMul(float* %h_A, i64 undef, float* %h_B, i64 undef, float* %h_C, i64 %bytes_C, i32 %k, i32 %n, i32 undef, i32 undef, i32 undef) +; %inc.us = add i32 %j.016.us, 1 +; %exitcond = icmp eq i32 %inc.us, %n +; br i1 %exitcond, label %for.inc4.us, label %for.body3.us +; +;for.body3.lr.ph.us: ; preds = %entry, %for.inc4.us +; %i.019.us = phi i32 [ %inc5.us, %for.inc4.us ], [ 0, %entry ] +; br label %for.body3.us +; +;for.end6: ; preds = %for.inc4.us, %entry +; %Out.sroa.1.0.lcssa = phi i32 [ undef, %entry ], [ %1, %for.inc4.us ] +; %Out.sroa.0.0.lcssa = phi float* [ undef, %entry ], [ %0, %for.inc4.us ] +; %.fca.0.insert = insertvalue %rtype undef, float* %Out.sroa.0.0.lcssa, 0 +; %.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %Out.sroa.1.0.lcssa, 1 +; ret %rtype %.fca.1.insert +;} + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture %argv) #0 { +entry: + tail call void @srand(i32 2006) #5 + %call = tail call noalias i8* @malloc(i64 4194304) #5 + %0 = bitcast i8* %call to float* + %call7 = tail call noalias i8* @malloc(i64 4194304) #5 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %entry + %indvars.iv.i = phi i64 [ %indvars.iv.next.i, %for.body.i ], [ 0, %entry ] + %call.i = tail call i32 @rand() #5 + %conv.i = sitofp i32 %call.i to float + %div.i = fmul float %conv.i, 0x3E00000000000000 + %arrayidx.i = getelementptr inbounds float* %0, i64 %indvars.iv.i + store float %div.i, float* %arrayidx.i, align 4, !tbaa !0 + %indvars.iv.next.i = add i64 %indvars.iv.i, 1 + %lftr.wideiv42 = trunc i64 %indvars.iv.next.i to i32 + %exitcond43 = icmp eq i32 %lftr.wideiv42, 1048576 + br i1 %exitcond43, label %for.body.i40.preheader, label %for.body.i + +for.body.i40.preheader: ; preds = %for.body.i + %1 = bitcast i8* %call7 to float* + br label %for.body.i40 + +for.body.i40: ; preds = %for.body.i40.preheader, %for.body.i40 + %indvars.iv.i32 = phi i64 [ %indvars.iv.next.i37, %for.body.i40 ], [ 0, %for.body.i40.preheader ] + %call.i33 = tail call i32 @rand() #5 + %conv.i34 = sitofp i32 %call.i33 to float + %div.i35 = fmul float %conv.i34, 0x3E00000000000000 + %arrayidx.i36 = getelementptr inbounds float* %1, i64 %indvars.iv.i32 + store float %div.i35, float* %arrayidx.i36, align 4, !tbaa !0 + %indvars.iv.next.i37 = add i64 %indvars.iv.i32, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next.i37 to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1048576 + br i1 %exitcond, label %randomInit.exit41, label %for.body.i40 + +randomInit.exit41: ; preds = %for.body.i40 + %call12 = tail call noalias i8* @malloc(i64 4194304) #5 + %2 = bitcast i8* %call12 to float* + + ; ---------------------------------- Adding VISC Launch Call -------------------------------- + ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024) + ; Setting up launch input args + %in.addr = alloca %struct.arg + + ; Store arguments + %in.addr.h_A = getelementptr %struct.arg* %in.addr, i32 0, i32 0 + %in.addr.bytes_A = getelementptr %struct.arg* %in.addr, i32 0, i32 1 + %in.addr.h_B = getelementptr %struct.arg* %in.addr, i32 0, i32 2 + %in.addr.bytes_B = getelementptr %struct.arg* %in.addr, i32 0, i32 3 + %in.addr.h_C = getelementptr %struct.arg* %in.addr, i32 0, i32 4 + %in.addr.bytes_C = getelementptr %struct.arg* %in.addr, i32 0, i32 5 + %in.addr.WA = getelementptr %struct.arg* %in.addr, i32 0, i32 6 + %in.addr.WB = getelementptr %struct.arg* %in.addr, i32 0, i32 7 + %in.addr.HA = getelementptr %struct.arg* %in.addr, i32 0, i32 8 + %in.addr.gridsize = getelementptr %struct.arg* %in.addr, i32 0, i32 9 + %in.addr.blocksize = getelementptr %struct.arg* %in.addr, i32 0, i32 10 + + store float* %0, float** %in.addr.h_A + store i64 4194304, i64* %in.addr.bytes_A + store float* %1, float** %in.addr.h_B + store i64 4194304, i64* %in.addr.bytes_B + store float* %2, float** %in.addr.h_C + store i64 4194304, i64* %in.addr.bytes_C + store i32 1024, i32* %in.addr.WA + store i32 1024, i32* %in.addr.WB + store i32 1024, i32* %in.addr.HA + store i32 64, i32* %in.addr.gridsize + store i32 16, i32* %in.addr.blocksize + + ; Change type to i8* and VISC Launch call + %args = bitcast %struct.arg* %in.addr to i8* + %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (float*, i64, float*, i64, float*, i64, i32, i32, i32, i32, i32)* @MatrixMulRoot to i8*), i8* %args) + + ; Wait for result + call void @llvm.visc.wait(i8* %graphID) + + ; Get the result + %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 11 + %out = load %rtype* %out.addr + ; -------------------------------- Completed VISC Launch Call -------------------------------- + + %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2) + %tobool = icmp eq i32 %call14, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %randomInit.exit41 + %puts31 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str15, i64 0, i64 0)) + br label %if.end + +if.else: ; preds = %randomInit.exit41 + %puts = tail call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @str13, i64 0, i64 0)) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %puts30 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str14, i64 0, i64 0)) + tail call void @free(i8* %call) #5 + tail call void @free(i8* %call7) #5 + tail call void @free(i8* %call12) #5 + ret i32 0 +} + +; Function Attrs: nounwind +declare void @srand(i32) #1 + +; Function Attrs: nounwind +declare void @free(i8* nocapture) #1 + +declare float @fabsf(float) + +; Function Attrs: nounwind +declare i32 @puts(i8* nocapture) #5 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind } +attributes #6 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"}