Skip to content
Snippets Groups Projects
Commit 7dcb7f01 authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Added print statements to kernel

parent 12dd20e6
No related branches found
No related tags found
No related merge requests found
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
; RUN: %t.bin 5
; RUN: %t.bin
; ModuleID = 'gemm_opencl.c'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
%struct._cl_mem = type opaque
%struct._cl_platform_id = type opaque
%struct._cl_context = type opaque
%struct._cl_device_id = type opaque
%struct._cl_command_queue = type opaque
%struct._cl_program = type opaque
%struct._cl_kernel = type opaque
%struct._cl_event = type opaque
@.str1 = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1
@str = private unnamed_addr constant [9 x i8] c"\0AFailed!\00"
@str26 = private unnamed_addr constant [7 x i8] c"\0ADone!\00"
......@@ -186,56 +175,65 @@ declare void @llvm.visc.bind.input(i8*, i32, i32)
declare void @llvm.visc.bind.output(i8*, i32, i32)
; ----------------- VISC intrinsics end ------------------
@.strce = private unnamed_addr constant [28 x i8] c"Computing element (%d, %d)\0A\00", align 1
@stref = private unnamed_addr constant [17 x i8] c"Entered function\00"
@strrc = private unnamed_addr constant [16 x i8] c"Result computed\00"
@strrw = private unnamed_addr constant [20 x i8] c"Result written to C\00"
@stroa = private unnamed_addr constant [17 x i8] c"Output allocated\00"
; Function Attrs: nounwind uwtable
define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 {
entry:
%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @stref, i64 0, i64 0))
; ------------------------- VISC changes ------------------
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
; Replaced statement -- %call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
; Replaced statement -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #3
%this_node = call i8* @llvm.visc.getNode()
%call = tail call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
%call1 = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
; Replaced statement -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2
%call1 = tail call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node)
; Replaced statement -- %call2 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #3
%call2 = call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node)
; ---------------------- VISC changes End ------------------
%cmp22 = icmp sgt i32 %k, 0
br i1 %cmp22, label %for.body.lr.ph, label %for.end
%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.strce, i64 0, i64 0), i32 %call1, i32 %call2) #3
%cmp32 = icmp sgt i32 %k, 0
br i1 %cmp32, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
%mul = mul nsw i32 %call1, %k
%mul = mul nsw i32 %call2, %k
%0 = sext i32 %mul to i64
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%res.024 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add7, %for.body ]
%res.034 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add9, %for.body ]
%1 = add nsw i64 %indvars.iv, %0
%arrayidx = getelementptr inbounds float* %A, i64 %1
%2 = load float* %arrayidx, align 4, !tbaa !0
%3 = trunc i64 %indvars.iv to i32
%mul2 = mul nsw i32 %3, %n
%add3 = add nsw i32 %mul2, %call
%idxprom4 = sext i32 %add3 to i64
%arrayidx5 = getelementptr inbounds float* %B, i64 %idxprom4
%4 = load float* %arrayidx5, align 4, !tbaa !0
%mul6 = fmul float %2, %4
%add7 = fadd float %res.024, %mul6
%mul4 = mul nsw i32 %3, %n
%add5 = add nsw i32 %mul4, %call1
%idxprom6 = sext i32 %add5 to i64
%arrayidx7 = getelementptr inbounds float* %B, i64 %idxprom6
%4 = load float* %arrayidx7, align 4, !tbaa !0
%mul8 = fmul float %2, %4
%add9 = fadd float %res.034, %mul8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %k
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add7, %for.body ]
%mul8 = mul nsw i32 %call1, %n
%add9 = add nsw i32 %mul8, %call
%idxprom10 = sext i32 %add9 to i64
%arrayidx11 = getelementptr inbounds float* %C, i64 %idxprom10
store float %res.0.lcssa, float* %arrayidx11, align 4, !tbaa !0
%res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add9, %for.body ]
%puts29 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @strrc, i64 0, i64 0))
%mul11 = mul nsw i32 %call2, %n
%add12 = add nsw i32 %mul11, %call1
%idxprom13 = sext i32 %add12 to i64
%arrayidx14 = getelementptr inbounds float* %C, i64 %idxprom13
store float %res.0.lcssa, float* %arrayidx14, align 4, !tbaa !0
%puts30 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @strrw, i64 0, i64 0))
%puts31 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @stroa, i64 0, i64 0))
%.fca.0.insert = insertvalue %rtype undef, float* %C, 0
%.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1
ret %rtype %.fca.1.insert
......@@ -424,9 +422,6 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind
declare void @srand(i32) #1
; Function Attrs: nounwind
declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture, ...) #1
; Function Attrs: noreturn nounwind
declare void @exit(i32) #5
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment