From a5ac493b473dd20419e72c0d6a08674d20cb534b Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Sun, 19 Oct 2014 18:24:00 +0000 Subject: [PATCH] Preparing test case for GPU --- llvm/test/VISC/gemm_opencl/matrixMul/Makefile | 2 +- .../VISC/gemm_opencl/matrixMul/gemm_opencl.c | 116 +-- .../gemm_opencl/matrixMul/visc_gemm_opencl.ll | 719 ++++++++++++++++++ 3 files changed, 781 insertions(+), 56 deletions(-) create mode 100644 llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll diff --git a/llvm/test/VISC/gemm_opencl/matrixMul/Makefile b/llvm/test/VISC/gemm_opencl/matrixMul/Makefile index 8426617b18..6e4d4a0521 100644 --- a/llvm/test/VISC/gemm_opencl/matrixMul/Makefile +++ b/llvm/test/VISC/gemm_opencl/matrixMul/Makefile @@ -27,4 +27,4 @@ $(HOST:%=%.bin):%.bin:%.c $(LLVM_CC) -O3 -lOpenCL -I /usr/local/cuda/include $< -o $@ clean : - rm -f *.ll *.bc *.s *.bin + rm -f $(HOST).ll $(KERNELS).ll *.bc *.s *.bin diff --git a/llvm/test/VISC/gemm_opencl/matrixMul/gemm_opencl.c b/llvm/test/VISC/gemm_opencl/matrixMul/gemm_opencl.c index 89c535add0..a84f67e438 100644 --- a/llvm/test/VISC/gemm_opencl/matrixMul/gemm_opencl.c +++ b/llvm/test/VISC/gemm_opencl/matrixMul/gemm_opencl.c @@ -109,52 +109,9 @@ int checkResults(float* A, float* B, float* C) { return 1; // Success } -// Main -int main(int argc, char** argv) { - - // seed for rand() - srand(2006); - - // Allocate host memory for matrices A and B - unsigned int size_A = WA * HA; - unsigned int bytes_A = sizeof(float) * size_A; - float* h_A = (float*) malloc(bytes_A); - - unsigned int size_B = WB * HB; - unsigned int bytes_B = sizeof(float) * size_B; - float* h_B = (float*) malloc(bytes_B); - - // Initialize host memory - randomInit(h_A, size_A); - randomInit(h_B, size_B); - -/* - // Print A and B - printf("\n\nMatrix A\n"); - for(int i = 0; i < size_A; i++) - { - printf("%f ", h_A[i]); - if(((i + 1) % WA) == 0) - printf("\n"); - } - - printf("\n\nMatrix B\n"); - for(int i = 0; i < size_B; i++) - { - printf("%f ", h_B[i]); - if(((i + 1) % WB) == 0) - printf("\n"); - } -*/ - - // Allocate host memory for the result matrix C - unsigned int size_C = WC * HC; - unsigned int bytes_C = sizeof(float) * size_C; - float* h_C = (float*) malloc(bytes_C); - - // Initialize OpenCL - - // OpenCL specific variables +// GPU Computation of MatrixMul +void computeMatrixMul(float* h_A, unsigned bytes_A, float* h_B, unsigned bytes_B, float* h_C, unsigned bytes_C) { + // OpenCL specific variables cl_context clGPUContext; cl_command_queue clCommandQue; cl_program clProgram; @@ -291,16 +248,7 @@ int main(int argc, char** argv) { } printf("\n"); */ - if(checkResults(h_A, h_B, h_C)) - printf("\nPass!\n"); - else - printf("\nFailed!\n"); - printf("\nDone!\n"); - // Deallocate memory - free(h_A); - free(h_B); - free(h_C); clReleaseMemObject(d_A); clReleaseMemObject(d_C); @@ -315,3 +263,61 @@ int main(int argc, char** argv) { } +// Main +int main(int argc, char** argv) { + + // seed for rand() + srand(2006); + + // Allocate host memory for matrices A and B + unsigned int size_A = WA * HA; + unsigned int bytes_A = sizeof(float) * size_A; + float* h_A = (float*) malloc(bytes_A); + + unsigned int size_B = WB * HB; + unsigned int bytes_B = sizeof(float) * size_B; + float* h_B = (float*) malloc(bytes_B); + + // Initialize host memory + randomInit(h_A, size_A); + randomInit(h_B, size_B); + +/* + // Print A and B + printf("\n\nMatrix A\n"); + for(int i = 0; i < size_A; i++) + { + printf("%f ", h_A[i]); + if(((i + 1) % WA) == 0) + printf("\n"); + } + + printf("\n\nMatrix B\n"); + for(int i = 0; i < size_B; i++) + { + printf("%f ", h_B[i]); + if(((i + 1) % WB) == 0) + printf("\n"); + } +*/ + + // Allocate host memory for the result matrix C + unsigned int size_C = WC * HC; + unsigned int bytes_C = sizeof(float) * size_C; + float* h_C = (float*) malloc(bytes_C); + + // Compute using OpenCL + computeMatrixMul(h_A, bytes_A, h_B, bytes_B, h_C, bytes_C); + + if(checkResults(h_A, h_B, h_C)) + printf("\nPass!\n"); + else + printf("\nFailed!\n"); + printf("\nDone!\n"); + + // Deallocate memory + free(h_A); + free(h_B); + free(h_C); +} + diff --git a/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll b/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll new file mode 100644 index 0000000000..da7bd5ec6a --- /dev/null +++ b/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll @@ -0,0 +1,719 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s +; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll +; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin +; RUN: %t.bin 5 +; ModuleID = 'gemm_opencl.c' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +%struct._cl_mem = type opaque +%struct._cl_platform_id = type opaque +%struct._cl_context = type opaque +%struct._cl_device_id = type opaque +%struct._cl_command_queue = type opaque +%struct._cl_program = type opaque +%struct._cl_kernel = type opaque +%struct._cl_event = type opaque + +; Return Type of VISC Compute Matrix Mul +%rtype = type {float*, i32} +%struct.arg = type { float*, i32, float*, i32, %rtype } + +; Function Attrs: nounwind +declare i8* @llvm.visc.launch(i8*, i8*) #0 + +; Function Attrs: nounwind +declare void @llvm.visc.wait(i8*) #0 + +@.str = private unnamed_addr constant [3 x i8] c"rb\00", align 1 +@.str1 = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1 +@.str2 = private unnamed_addr constant [35 x i8] c"Failure to get number of platforms\00", align 1 +@.str3 = private unnamed_addr constant [28 x i8] c"Failure to get platform IDs\00", align 1 +@.str4 = private unnamed_addr constant [12 x i8] c" -- %d --\0A\00", align 1 +@.str5 = private unnamed_addr constant [16 x i8] c" PROFILE = %s\0A\00", align 1 +@.str6 = private unnamed_addr constant [16 x i8] c" VERSION = %s\0A\00", align 1 +@.str7 = private unnamed_addr constant [13 x i8] c" NAME = %s\0A\00", align 1 +@.str8 = private unnamed_addr constant [15 x i8] c" VENDOR = %s\0A\00", align 1 +@.str9 = private unnamed_addr constant [19 x i8] c" EXTENSIONS = %s\0A\00", align 1 +@.str10 = private unnamed_addr constant [30 x i8] c"Failure to create GPU context\00", align 1 +@.str11 = private unnamed_addr constant [28 x i8] c"Failure to get context info\00", align 1 +@.str12 = private unnamed_addr constant [32 x i8] c"Failure to create command queue\00", align 1 +@.str13 = private unnamed_addr constant [18 x i8] c"matrixMul.nvptx.s\00", align 1 +@.str14 = private unnamed_addr constant [31 x i8] c"Failure to load Program Binary\00", align 1 +@.str15 = private unnamed_addr constant [38 x i8] c"Failure to create program from binary\00", align 1 +@.str16 = private unnamed_addr constant [25 x i8] c"Failure to build program\00", align 1 +@.str17 = private unnamed_addr constant [10 x i8] c"matrixMul\00", align 1 +@.str18 = private unnamed_addr constant [25 x i8] c"Failure to create kernel\00", align 1 +@.str19 = private unnamed_addr constant [32 x i8] c"Failure to set kernel arguments\00", align 1 +@.str20 = private unnamed_addr constant [26 x i8] c"Failure to enqueue kernel\00", align 1 +@.str21 = private unnamed_addr constant [23 x i8] c"Failure to read buffer\00", align 1 +@stderr = external global %struct._IO_FILE* +@.str25 = private unnamed_addr constant [11 x i8] c"ERROR: %s\0A\00", align 1 +@str = private unnamed_addr constant [9 x i8] c"\0AFailed!\00" +@str26 = private unnamed_addr constant [7 x i8] c"\0ADone!\00" +@str27 = private unnamed_addr constant [7 x i8] c"\0APass!\00" + +; Function Attrs: nounwind uwtable +define void @randomInit(float* nocapture %data, i32 %size) #0 { +entry: + %cmp3 = icmp sgt i32 %size, 0 + br i1 %cmp3, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %call = tail call i32 @rand() #4 + %conv = sitofp i32 %call to float + %div = fmul float %conv, 0x3E00000000000000 + %arrayidx = getelementptr inbounds float* %data, i64 %indvars.iv + store float %div, float* %arrayidx, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Function Attrs: nounwind +declare i32 @rand() #1 + +; Function Attrs: nounwind uwtable +define noalias i8* @LoadProgSource(i8* nocapture %cFilename, i64* %szFinalLength) #0 { +entry: + %call = tail call %struct._IO_FILE* @fopen(i8* %cFilename, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0)) #4 + %cmp = icmp eq %struct._IO_FILE* %call, null + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %call1 = tail call i32 @fseek(%struct._IO_FILE* %call, i64 0, i32 2) #4 + %call2 = tail call i64 @ftell(%struct._IO_FILE* %call) #4 + %call3 = tail call i32 @fseek(%struct._IO_FILE* %call, i64 0, i32 0) #4 + %add = add i64 %call2, 1 + %call4 = tail call noalias i8* @malloc(i64 %add) #4 + %call5 = tail call i64 @fread(i8* %call4, i64 %call2, i64 1, %struct._IO_FILE* %call) #4 + %cmp6 = icmp eq i64 %call5, 1 + %call8 = tail call i32 @fclose(%struct._IO_FILE* %call) #4 + br i1 %cmp6, label %if.end9, label %if.then7 + +if.then7: ; preds = %if.end + tail call void @free(i8* %call4) #4 + br label %return + +if.end9: ; preds = %if.end + %cmp11 = icmp eq i64* %szFinalLength, null + br i1 %cmp11, label %if.end13, label %if.then12 + +if.then12: ; preds = %if.end9 + store i64 %call2, i64* %szFinalLength, align 8, !tbaa !3 + br label %if.end13 + +if.end13: ; preds = %if.end9, %if.then12 + %arrayidx = getelementptr inbounds i8* %call4, i64 %call2 + store i8 0, i8* %arrayidx, align 1, !tbaa !1 + br label %return + +return: ; preds = %entry, %if.end13, %if.then7 + %retval.0 = phi i8* [ null, %if.then7 ], [ %call4, %if.end13 ], [ null, %entry ] + ret i8* %retval.0 +} + +; Function Attrs: nounwind +declare noalias %struct._IO_FILE* @fopen(i8* nocapture, i8* nocapture) #1 + +; Function Attrs: nounwind +declare i32 @fseek(%struct._IO_FILE* nocapture, i64, i32) #1 + +; Function Attrs: nounwind +declare i64 @ftell(%struct._IO_FILE* nocapture) #1 + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) #1 + +; Function Attrs: nounwind +declare i64 @fread(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1 + +; Function Attrs: nounwind +declare i32 @fclose(%struct._IO_FILE* nocapture) #1 + +; Function Attrs: nounwind +declare void @free(i8* nocapture) #1 + +; Function Attrs: nounwind readnone uwtable +define i32 @isEqual(float %a, float %b) #2 { +entry: + %sub = fsub float %a, %b + %fabsf = tail call float @fabsf(float %sub) #6 + %0 = fpext float %fabsf to double + %cmp = fcmp olt double %0, 1.000000e-03 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + +; Function Attrs: nounwind uwtable +define i32 @checkResults(float* nocapture %A, float* nocapture %B, float* nocapture %C) #0 { +entry: + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %entry, %for.inc50 + %indvars.iv92 = phi i64 [ 0, %entry ], [ %indvars.iv.next93, %for.inc50 ] + %i.081 = phi i32 [ 0, %entry ], [ %inc51, %for.inc50 ] + %0 = shl nsw i64 %indvars.iv92, 10 + br label %for.body7 + +for.cond4: ; preds = %for.end + %inc48 = add nsw i32 %j.079, 1 + %1 = trunc i64 %indvars.iv.next89 to i32 + %cmp5 = icmp slt i32 %1, 1024 + br i1 %cmp5, label %for.body7, label %for.inc50 + +for.body7: ; preds = %for.cond4.preheader, %for.cond4 + %indvars.iv88 = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next89, %for.cond4 ] + %j.079 = phi i32 [ 0, %for.cond4.preheader ], [ %inc48, %for.cond4 ] + %2 = add nsw i64 %indvars.iv88, %0 + br label %for.body12 + +for.body12: ; preds = %for.body12, %for.body7 + %indvars.iv = phi i64 [ 0, %for.body7 ], [ %indvars.iv.next, %for.body12 ] + %3 = phi float [ 0.000000e+00, %for.body7 ], [ %add26, %for.body12 ] + %4 = add nsw i64 %indvars.iv, %0 + %arrayidx16 = getelementptr inbounds float* %A, i64 %4 + %5 = load float* %arrayidx16, align 4, !tbaa !0 + %6 = shl i64 %indvars.iv, 10 + %7 = add nsw i64 %6, %indvars.iv88 + %arrayidx20 = getelementptr inbounds float* %B, i64 %7 + %8 = load float* %arrayidx20, align 4, !tbaa !0 + %mul21 = fmul float %5, %8 + %add26 = fadd float %3, %mul21 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body12 + +for.end: ; preds = %for.body12 + %arrayidx34 = getelementptr inbounds float* %C, i64 %2 + %9 = load float* %arrayidx34, align 4, !tbaa !0 + %sub.i = fsub float %add26, %9 + %fabsf.i = tail call float @fabsf(float %sub.i) #6 + %10 = fpext float %fabsf.i to double + %cmp.i = fcmp olt double %10, 1.000000e-03 + %indvars.iv.next89 = add i64 %indvars.iv88, 1 + br i1 %cmp.i, label %for.cond4, label %if.then + +if.then: ; preds = %for.end + %conv40 = fpext float %9 to double + %conv45 = fpext float %add26 to double + %call46 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @.str1, i64 0, i64 0), i32 %i.081, i32 %j.079, double %conv40, double %conv45) #4 + br label %return + +for.inc50: ; preds = %for.cond4 + %indvars.iv.next93 = add i64 %indvars.iv92, 1 + %inc51 = add nsw i32 %i.081, 1 + %11 = trunc i64 %indvars.iv.next93 to i32 + %cmp = icmp slt i32 %11, 1024 + br i1 %cmp, label %for.cond4.preheader, label %return + +return: ; preds = %for.inc50, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %for.inc50 ] + ret i32 %retval.0 +} + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture, ...) #1 + +; Function Attrs: nounwind uwtable +define void @computeMatrixMul(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C) #0 { +entry: + %dataBytes = alloca i64, align 8 + %errcode = alloca i32, align 4 + %d_A = alloca %struct._cl_mem*, align 8 + %d_B = alloca %struct._cl_mem*, align 8 + %d_C = alloca %struct._cl_mem*, align 8 + %numPlatforms = alloca i32, align 4 + %buffer = alloca [10240 x i8], align 16 + %properties = alloca [3 x i64], align 16 + %binaryLength = alloca i64, align 8 + %clMatrixMul = alloca i8*, align 8 + %binaryStatus = alloca i32, align 4 + %localWorkSize = alloca [2 x i64], align 16 + %globalWorkSize = alloca [2 x i64], align 16 + %wA = alloca i32, align 4 + %wC = alloca i32, align 4 + %call = call i32 @clGetPlatformIDs(i32 0, %struct._cl_platform_id** null, i32* %numPlatforms) #4 + store i32 %call, i32* %errcode, align 4, !tbaa !4 + %cmp.i = icmp eq i32 %call, 0 + br i1 %cmp.i, label %checkErr.exit, label %if.then.i + +if.then.i: ; preds = %entry + %0 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit: ; preds = %entry + %1 = load i32* %numPlatforms, align 4, !tbaa !4 + %2 = zext i32 %1 to i64 + %vla = alloca %struct._cl_platform_id*, i64 %2, align 16 + %call1 = call i32 @clGetPlatformIDs(i32 %1, %struct._cl_platform_id** %vla, i32* null) #4 + store i32 %call1, i32* %errcode, align 4, !tbaa !4 + %cmp.i105 = icmp eq i32 %call1, 0 + br i1 %cmp.i105, label %for.cond.preheader, label %if.then.i107 + +for.cond.preheader: ; preds = %checkErr.exit + %3 = load i32* %numPlatforms, align 4, !tbaa !4 + %cmp148 = icmp eq i32 %3, 0 + br i1 %cmp148, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %for.cond.preheader + %4 = getelementptr inbounds [10240 x i8]* %buffer, i64 0, i64 0 + br label %for.body + +if.then.i107: ; preds = %checkErr.exit + %5 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i106 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([28 x i8]* @.str3, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.0149 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + call void @llvm.lifetime.start(i64 10240, i8* %4) #4 + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str4, i64 0, i64 0), i32 %i.0149) #4 + %idxprom = zext i32 %i.0149 to i64 + %arrayidx = getelementptr inbounds %struct._cl_platform_id** %vla, i64 %idxprom + %6 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 + %call3 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %6, i32 2304, i64 10240, i8* %4, i64* null) #4 + %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str5, i64 0, i64 0), i8* %4) #4 + %7 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 + %call9 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %7, i32 2305, i64 10240, i8* %4, i64* null) #4 + %call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str6, i64 0, i64 0), i8* %4) #4 + %8 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 + %call15 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %8, i32 2306, i64 10240, i8* %4, i64* null) #4 + %call17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str7, i64 0, i64 0), i8* %4) #4 + %9 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 + %call21 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %9, i32 2307, i64 10240, i8* %4, i64* null) #4 + %call23 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str8, i64 0, i64 0), i8* %4) #4 + %10 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 + %call27 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %10, i32 2308, i64 10240, i8* %4, i64* null) #4 + %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([19 x i8]* @.str9, i64 0, i64 0), i8* %4) #4 + call void @llvm.lifetime.end(i64 10240, i8* %4) #4 + %inc = add i32 %i.0149, 1 + %11 = load i32* %numPlatforms, align 4, !tbaa !4 + %cmp = icmp ult i32 %inc, %11 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %for.cond.preheader + %arrayinit.begin = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 0 + store i64 4228, i64* %arrayinit.begin, align 16, !tbaa !3 + %arrayinit.element = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 1 + %12 = load %struct._cl_platform_id** %vla, align 16, !tbaa !5 + %13 = ptrtoint %struct._cl_platform_id* %12 to i64 + %sext = shl i64 %13, 32 + %conv = ashr exact i64 %sext, 32 + store i64 %conv, i64* %arrayinit.element, align 8, !tbaa !3 + %arrayinit.element31 = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 2 + store i64 0, i64* %arrayinit.element31, align 16, !tbaa !3 + %call33 = call %struct._cl_context* @clCreateContextFromType(i64* %arrayinit.begin, i64 4, void (i8*, i8*, i64, i8*)* null, i8* null, i32* %errcode) #4 + %14 = load i32* %errcode, align 4, !tbaa !4 + %cmp.i109 = icmp eq i32 %14, 0 + br i1 %cmp.i109, label %checkErr.exit112, label %if.then.i111 + +if.then.i111: ; preds = %for.end + %15 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i110 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %15, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8]* @.str10, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit112: ; preds = %for.end + %call34 = call i32 @clGetContextInfo(%struct._cl_context* %call33, i32 4225, i64 0, i8* null, i64* %dataBytes) #4 + store i32 %call34, i32* %errcode, align 4, !tbaa !4 + %16 = load i64* %dataBytes, align 8, !tbaa !3 + %call35 = call noalias i8* @malloc(i64 %16) #4 + %17 = bitcast i8* %call35 to %struct._cl_device_id** + %call36 = call i32 @clGetContextInfo(%struct._cl_context* %call33, i32 4225, i64 %16, i8* %call35, i64* null) #4 + %18 = load i32* %errcode, align 4, !tbaa !4 + %or = or i32 %18, %call36 + store i32 %or, i32* %errcode, align 4, !tbaa !4 + %cmp.i113 = icmp eq i32 %or, 0 + br i1 %cmp.i113, label %checkErr.exit116, label %if.then.i115 + +if.then.i115: ; preds = %checkErr.exit112 + %19 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i114 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %19, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([28 x i8]* @.str11, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit116: ; preds = %checkErr.exit112 + %20 = load %struct._cl_device_id** %17, align 8, !tbaa !5 + %call38 = call %struct._cl_command_queue* @clCreateCommandQueue(%struct._cl_context* %call33, %struct._cl_device_id* %20, i64 0, i32* %errcode) #4 + %21 = load i32* %errcode, align 4, !tbaa !4 + %cmp.i117 = icmp eq i32 %21, 0 + br i1 %cmp.i117, label %checkErr.exit120, label %if.then.i119 + +if.then.i119: ; preds = %checkErr.exit116 + %22 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i118 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %22, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8]* @.str12, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit120: ; preds = %checkErr.exit116 + %conv39 = zext i32 %bytes_C to i64 + %call40 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 1, i64 %conv39, i8* null, i32* %errcode) #4 + store %struct._cl_mem* %call40, %struct._cl_mem** %d_C, align 8, !tbaa !5 + %conv41 = zext i32 %bytes_A to i64 + %23 = bitcast float* %h_A to i8* + %call42 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 33, i64 %conv41, i8* %23, i32* %errcode) #4 + store %struct._cl_mem* %call42, %struct._cl_mem** %d_A, align 8, !tbaa !5 + %conv43 = zext i32 %bytes_B to i64 + %24 = bitcast float* %h_B to i8* + %call44 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 33, i64 %conv43, i8* %24, i32* %errcode) #4 + store %struct._cl_mem* %call44, %struct._cl_mem** %d_B, align 8, !tbaa !5 + %call45 = call i8* @LoadProgSource(i8* getelementptr inbounds ([18 x i8]* @.str13, i64 0, i64 0), i64* %binaryLength) + store i8* %call45, i8** %clMatrixMul, align 8, !tbaa !5 + %cmp46 = icmp eq i8* %call45, null + br i1 %cmp46, label %if.then.i122, label %checkErr.exit123 + +if.then.i122: ; preds = %checkErr.exit120 + %25 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i121 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %25, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str14, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit123: ; preds = %checkErr.exit120 + %call49 = call %struct._cl_program* @clCreateProgramWithBinary(%struct._cl_context* %call33, i32 1, %struct._cl_device_id** %17, i64* %binaryLength, i8** %clMatrixMul, i32* %binaryStatus, i32* %errcode) #4 + %26 = load i32* %errcode, align 4, !tbaa !4 + %cmp.i124 = icmp eq i32 %26, 0 + br i1 %cmp.i124, label %checkErr.exit127, label %if.then.i126 + +if.then.i126: ; preds = %checkErr.exit123 + %27 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i125 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %27, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([38 x i8]* @.str15, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit127: ; preds = %checkErr.exit123 + %call50 = call i32 @clBuildProgram(%struct._cl_program* %call49, i32 0, %struct._cl_device_id** null, i8* null, void (%struct._cl_program*, i8*)* null, i8* null) #4 + store i32 %call50, i32* %errcode, align 4, !tbaa !4 + %cmp.i128 = icmp eq i32 %call50, 0 + br i1 %cmp.i128, label %checkErr.exit131, label %if.then.i130 + +if.then.i130: ; preds = %checkErr.exit127 + %28 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i129 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %28, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([25 x i8]* @.str16, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit131: ; preds = %checkErr.exit127 + %call51 = call %struct._cl_kernel* @clCreateKernel(%struct._cl_program* %call49, i8* getelementptr inbounds ([10 x i8]* @.str17, i64 0, i64 0), i32* %errcode) #4 + %29 = load i32* %errcode, align 4, !tbaa !4 + %cmp.i132 = icmp eq i32 %29, 0 + br i1 %cmp.i132, label %checkErr.exit135, label %if.then.i134 + +if.then.i134: ; preds = %checkErr.exit131 + %30 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i133 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %30, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([25 x i8]* @.str18, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit135: ; preds = %checkErr.exit131 + store i32 1024, i32* %wA, align 4, !tbaa !4 + store i32 1024, i32* %wC, align 4, !tbaa !4 + %31 = bitcast %struct._cl_mem** %d_C to i8* + %call52 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 0, i64 8, i8* %31) #4 + store i32 %call52, i32* %errcode, align 4, !tbaa !4 + %32 = bitcast %struct._cl_mem** %d_A to i8* + %call53 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 1, i64 8, i8* %32) #4 + %33 = load i32* %errcode, align 4, !tbaa !4 + %or54 = or i32 %33, %call53 + store i32 %or54, i32* %errcode, align 4, !tbaa !4 + %34 = bitcast %struct._cl_mem** %d_B to i8* + %call55 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 2, i64 8, i8* %34) #4 + %35 = load i32* %errcode, align 4, !tbaa !4 + %or56 = or i32 %35, %call55 + store i32 %or56, i32* %errcode, align 4, !tbaa !4 + %36 = bitcast i32* %wA to i8* + %call57 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 3, i64 4, i8* %36) #4 + %37 = load i32* %errcode, align 4, !tbaa !4 + %or58 = or i32 %37, %call57 + store i32 %or58, i32* %errcode, align 4, !tbaa !4 + %38 = bitcast i32* %wC to i8* + %call59 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 4, i64 4, i8* %38) #4 + %39 = load i32* %errcode, align 4, !tbaa !4 + %or60 = or i32 %39, %call59 + store i32 %or60, i32* %errcode, align 4, !tbaa !4 + %cmp.i136 = icmp eq i32 %or60, 0 + br i1 %cmp.i136, label %checkErr.exit139, label %if.then.i138 + +if.then.i138: ; preds = %checkErr.exit135 + %40 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i137 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %40, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8]* @.str19, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit139: ; preds = %checkErr.exit135 + %arrayidx61 = getelementptr inbounds [2 x i64]* %localWorkSize, i64 0, i64 0 + store i64 16, i64* %arrayidx61, align 16, !tbaa !3 + %arrayidx62 = getelementptr inbounds [2 x i64]* %localWorkSize, i64 0, i64 1 + store i64 16, i64* %arrayidx62, align 8, !tbaa !3 + %arrayidx63 = getelementptr inbounds [2 x i64]* %globalWorkSize, i64 0, i64 0 + store i64 1024, i64* %arrayidx63, align 16, !tbaa !3 + %arrayidx64 = getelementptr inbounds [2 x i64]* %globalWorkSize, i64 0, i64 1 + store i64 1024, i64* %arrayidx64, align 8, !tbaa !3 + %call67 = call i32 @clEnqueueNDRangeKernel(%struct._cl_command_queue* %call38, %struct._cl_kernel* %call51, i32 2, i64* null, i64* %arrayidx63, i64* %arrayidx61, i32 0, %struct._cl_event** null, %struct._cl_event** null) #4 + store i32 %call67, i32* %errcode, align 4, !tbaa !4 + %cmp.i140 = icmp eq i32 %call67, 0 + br i1 %cmp.i140, label %checkErr.exit143, label %if.then.i142 + +if.then.i142: ; preds = %checkErr.exit139 + %41 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i141 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %41, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([26 x i8]* @.str20, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit143: ; preds = %checkErr.exit139 + %42 = load %struct._cl_mem** %d_C, align 8, !tbaa !5 + %43 = bitcast float* %h_C to i8* + %call69 = call i32 @clEnqueueReadBuffer(%struct._cl_command_queue* %call38, %struct._cl_mem* %42, i32 1, i64 0, i64 %conv39, i8* %43, i32 0, %struct._cl_event** null, %struct._cl_event** null) #4 + store i32 %call69, i32* %errcode, align 4, !tbaa !4 + %cmp.i144 = icmp eq i32 %call69, 0 + br i1 %cmp.i144, label %checkErr.exit147, label %if.then.i146 + +if.then.i146: ; preds = %checkErr.exit143 + %44 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call.i145 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %44, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8]* @.str21, i64 0, i64 0)) #4 + call void @exit(i32 1) #7 + unreachable + +checkErr.exit147: ; preds = %checkErr.exit143 + %45 = load %struct._cl_mem** %d_A, align 8, !tbaa !5 + %call70 = call i32 @clReleaseMemObject(%struct._cl_mem* %45) #4 + %46 = load %struct._cl_mem** %d_C, align 8, !tbaa !5 + %call71 = call i32 @clReleaseMemObject(%struct._cl_mem* %46) #4 + %47 = load %struct._cl_mem** %d_B, align 8, !tbaa !5 + %call72 = call i32 @clReleaseMemObject(%struct._cl_mem* %47) #4 + call void @free(i8* %call35) #4 + %48 = load i8** %clMatrixMul, align 8, !tbaa !5 + call void @free(i8* %48) #4 + %call73 = call i32 @clReleaseContext(%struct._cl_context* %call33) #4 + %call74 = call i32 @clReleaseKernel(%struct._cl_kernel* %call51) #4 + %call75 = call i32 @clReleaseProgram(%struct._cl_program* %call49) #4 + %call76 = call i32 @clReleaseCommandQueue(%struct._cl_command_queue* %call38) #4 + ret void +} + +declare i32 @clGetPlatformIDs(i32, %struct._cl_platform_id**, i32*) #3 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #4 + +declare i32 @clGetPlatformInfo(%struct._cl_platform_id*, i32, i64, i8*, i64*) #3 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #4 + +declare %struct._cl_context* @clCreateContextFromType(i64*, i64, void (i8*, i8*, i64, i8*)*, i8*, i32*) #3 + +declare i32 @clGetContextInfo(%struct._cl_context*, i32, i64, i8*, i64*) #3 + +declare %struct._cl_command_queue* @clCreateCommandQueue(%struct._cl_context*, %struct._cl_device_id*, i64, i32*) #3 + +declare %struct._cl_mem* @clCreateBuffer(%struct._cl_context*, i64, i64, i8*, i32*) #3 + +declare %struct._cl_program* @clCreateProgramWithBinary(%struct._cl_context*, i32, %struct._cl_device_id**, i64*, i8**, i32*, i32*) #3 + +declare i32 @clBuildProgram(%struct._cl_program*, i32, %struct._cl_device_id**, i8*, void (%struct._cl_program*, i8*)*, i8*) #3 + +declare %struct._cl_kernel* @clCreateKernel(%struct._cl_program*, i8*, i32*) #3 + +declare i32 @clSetKernelArg(%struct._cl_kernel*, i32, i64, i8*) #3 + +declare i32 @clEnqueueNDRangeKernel(%struct._cl_command_queue*, %struct._cl_kernel*, i32, i64*, i64*, i64*, i32, %struct._cl_event**, %struct._cl_event**) #3 + +declare i32 @clEnqueueReadBuffer(%struct._cl_command_queue*, %struct._cl_mem*, i32, i64, i64, i8*, i32, %struct._cl_event**, %struct._cl_event**) #3 + +declare i32 @clReleaseMemObject(%struct._cl_mem*) #3 + +declare i32 @clReleaseContext(%struct._cl_context*) #3 + +declare i32 @clReleaseKernel(%struct._cl_kernel*) #3 + +declare i32 @clReleaseProgram(%struct._cl_program*) #3 + +declare i32 @clReleaseCommandQueue(%struct._cl_command_queue*) #3 + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture %argv) #0 { +entry: + tail call void @srand(i32 2006) #4 + %call = tail call noalias i8* @malloc(i64 4194304) #4 + %0 = bitcast i8* %call to float* + %call7 = tail call noalias i8* @malloc(i64 4194304) #4 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %entry + %indvars.iv.i = phi i64 [ %indvars.iv.next.i, %for.body.i ], [ 0, %entry ] + %call.i = tail call i32 @rand() #4 + %conv.i = sitofp i32 %call.i to float + %div.i = fmul float %conv.i, 0x3E00000000000000 + %arrayidx.i = getelementptr inbounds float* %0, i64 %indvars.iv.i + store float %div.i, float* %arrayidx.i, align 4, !tbaa !0 + %indvars.iv.next.i = add i64 %indvars.iv.i, 1 + %lftr.wideiv59 = trunc i64 %indvars.iv.next.i to i32 + %exitcond60 = icmp eq i32 %lftr.wideiv59, 1048576 + br i1 %exitcond60, label %for.body.i40.preheader, label %for.body.i + +for.body.i40.preheader: ; preds = %for.body.i + %1 = bitcast i8* %call7 to float* + br label %for.body.i40 + +for.body.i40: ; preds = %for.body.i40.preheader, %for.body.i40 + %indvars.iv.i32 = phi i64 [ %indvars.iv.next.i37, %for.body.i40 ], [ 0, %for.body.i40.preheader ] + %call.i33 = tail call i32 @rand() #4 + %conv.i34 = sitofp i32 %call.i33 to float + %div.i35 = fmul float %conv.i34, 0x3E00000000000000 + %arrayidx.i36 = getelementptr inbounds float* %1, i64 %indvars.iv.i32 + store float %div.i35, float* %arrayidx.i36, align 4, !tbaa !0 + %indvars.iv.next.i37 = add i64 %indvars.iv.i32, 1 + %lftr.wideiv57 = trunc i64 %indvars.iv.next.i37 to i32 + %exitcond58 = icmp eq i32 %lftr.wideiv57, 1048576 + br i1 %exitcond58, label %randomInit.exit41, label %for.body.i40 + +randomInit.exit41: ; preds = %for.body.i40 + %call12 = tail call noalias i8* @malloc(i64 4194304) #4 + %2 = bitcast i8* %call12 to float* + + ; ----- Adding VISC Launch Call ----- + ; Setting up launch input args + %in.addr = alloca %struct.arg + + ; Store arguments + %in.addr.h_A = getelementptr %struct.arg* %in.addr, i32 0, i32 0 + %in.addr.bytes_A = getelementptr %struct.arg* %in.addr, i32 0, i32 1 + %in.addr.h_B = getelementptr %struct.arg* %in.addr, i32 0, i32 2 + %in.addr.bytes_B = getelementptr %struct.arg* %in.addr, i32 0, i32 3 + store float* %0, float** %in.addr.h_A + store i32 4194304, i32* %in.addr.bytes_A + store float* %1, float** %in.addr.h_B + store i32 4194304, i32* %in.addr.bytes_B + + ; Change type to i8* and VISC Launch call + %args = bitcast { float*, i32, float*, i32, %rtype }* %in.addr to i8* + %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (float*, i32, float*, i32)* @MatrixMulRoot to i8*), i8* %args) + ;tail call void @computeMatrixMul(float* %0, i32 4194304, float* %1, i32 4194304, float* %2, i32 4194304) + + ; Wait for result + call void @llvm.visc.wait(i8* %graphID) + + ; Get the result + %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 4 + %out = load %rtype* %out.addr + %out.h_C = extractvalue %rtype %out, 0 + ;%2 = extractvalue %rtype %out, 0 + %out.bytes_C = extractvalue %rtype %outputstruct, 1 + + ; ----- Completed VISC Launch Call Code ----- + + br label %for.cond4.preheader.i + +for.cond4.preheader.i: ; preds = %for.inc50.i, %randomInit.exit41 + %indvars.iv92.i = phi i64 [ 0, %randomInit.exit41 ], [ %indvars.iv.next93.i, %for.inc50.i ] + %i.081.i = phi i32 [ 0, %randomInit.exit41 ], [ %inc51.i, %for.inc50.i ] + %3 = shl nsw i64 %indvars.iv92.i, 10 + br label %for.body7.i + +for.cond4.i: ; preds = %for.end.i + %inc48.i = add nsw i32 %j.079.i, 1 + %4 = trunc i64 %indvars.iv.next89.i to i32 + %cmp5.i = icmp slt i32 %4, 1024 + br i1 %cmp5.i, label %for.body7.i, label %for.inc50.i + +for.body7.i: ; preds = %for.cond4.i, %for.cond4.preheader.i + %indvars.iv88.i = phi i64 [ 0, %for.cond4.preheader.i ], [ %indvars.iv.next89.i, %for.cond4.i ] + %j.079.i = phi i32 [ 0, %for.cond4.preheader.i ], [ %inc48.i, %for.cond4.i ] + br label %for.body12.i + +for.body12.i: ; preds = %for.body12.i, %for.body7.i + %indvars.iv.i42 = phi i64 [ 0, %for.body7.i ], [ %indvars.iv.next.i43, %for.body12.i ] + %5 = phi float [ 0.000000e+00, %for.body7.i ], [ %add26.i, %for.body12.i ] + %6 = add nsw i64 %indvars.iv.i42, %3 + %arrayidx16.i = getelementptr inbounds float* %0, i64 %6 + %7 = load float* %arrayidx16.i, align 4, !tbaa !0 + %8 = shl i64 %indvars.iv.i42, 10 + %9 = add nsw i64 %8, %indvars.iv88.i + %arrayidx20.i = getelementptr inbounds float* %1, i64 %9 + %10 = load float* %arrayidx20.i, align 4, !tbaa !0 + %mul21.i = fmul float %7, %10 + %add26.i = fadd float %5, %mul21.i + %indvars.iv.next.i43 = add i64 %indvars.iv.i42, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next.i43 to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end.i, label %for.body12.i + +for.end.i: ; preds = %for.body12.i + %11 = add nsw i64 %indvars.iv88.i, %3 + ;%arrayidx34.i = getelementptr inbounds float* %2, i64 %11 + %arrayidx34.i = getelementptr inbounds float* %out.h_C, i64 %11 + %12 = load float* %arrayidx34.i, align 4, !tbaa !0 + %sub.i.i = fsub float %add26.i, %12 + %fabsf.i.i = tail call float @fabsf(float %sub.i.i) #6 + %13 = fpext float %fabsf.i.i to double + %cmp.i.i = fcmp olt double %13, 1.000000e-03 + %indvars.iv.next89.i = add i64 %indvars.iv88.i, 1 + br i1 %cmp.i.i, label %for.cond4.i, label %if.else + +for.inc50.i: ; preds = %for.cond4.i + %indvars.iv.next93.i = add i64 %indvars.iv92.i, 1 + %inc51.i = add nsw i32 %i.081.i, 1 + %14 = trunc i64 %indvars.iv.next93.i to i32 + %cmp.i = icmp slt i32 %14, 1024 + br i1 %cmp.i, label %for.cond4.preheader.i, label %if.then + +if.then: ; preds = %for.inc50.i + %puts31 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str27, i64 0, i64 0)) + br label %if.end + +if.else: ; preds = %for.end.i + %conv40.i = fpext float %12 to double + %conv45.i = fpext float %add26.i to double + %call46.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @.str1, i64 0, i64 0), i32 %i.081.i, i32 %j.079.i, double %conv40.i, double %conv45.i) #4 + %puts = tail call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @str, i64 0, i64 0)) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %puts30 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str26, i64 0, i64 0)) + tail call void @free(i8* %call) #4 + tail call void @free(i8* %call7) #4 + tail call void @free(i8* %call12) #4 + ret i32 0 +} + +; Function Attrs: nounwind +declare void @srand(i32) #1 + +; Function Attrs: nounwind +declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture, ...) #1 + +; Function Attrs: noreturn nounwind +declare void @exit(i32) #5 + +declare float @fabsf(float) + +; Function Attrs: nounwind +declare i32 @puts(i8* nocapture) #4 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } +attributes #5 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noreturn nounwind } + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"long", metadata !1} +!4 = metadata !{metadata !"int", metadata !1} +!5 = metadata !{metadata !"any pointer", metadata !1} -- GitLab