Skip to content
Snippets Groups Projects
Commit a12e4c7b authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Example visc_sgemm almost complete

parent bdf25051
No related branches found
No related tags found
No related merge requests found
......@@ -19,7 +19,7 @@ target triple = "x86_64-unknown-linux-gnu"
; Return Type of VISC Compute Matrix Mul
%rtype = type {float*, i32}
%struct.arg = type { float*, i32, float*, i32, %rtype }
%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }
; Function Attrs: nounwind
declare i8* @llvm.visc.launch(i8*, i8*) #0
......@@ -542,18 +542,65 @@ declare i32 @clReleaseProgram(%struct._cl_program*) #3
declare i32 @clReleaseCommandQueue(%struct._cl_command_queue*) #3
define %rtype @MatrixMulRoot(float* h_A, i32 bytes_A, float* h_B, i32 bytes_B, i32 WA, i32 WB, i32 HA) {
%kernel = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (float*, i32, float*, i32, i32, i32)* @matrixMul to i8*), i32 WB, i32 HA)
; Function Attrs: nounwind uwtable
define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 {
entry:
%call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
%call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2
%cmp22 = icmp sgt i32 %k, 0
br i1 %cmp22, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
%mul = mul nsw i32 %call1, %k
%0 = sext i32 %mul to i64
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%res.024 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add7, %for.body ]
%1 = add nsw i64 %indvars.iv, %0
%arrayidx = getelementptr inbounds float* %A, i64 %1
%2 = load float* %arrayidx, align 4, !tbaa !0
%3 = trunc i64 %indvars.iv to i32
%mul2 = mul nsw i32 %3, %n
%add3 = add nsw i32 %mul2, %call
%idxprom4 = sext i32 %add3 to i64
%arrayidx5 = getelementptr inbounds float* %B, i64 %idxprom4
%4 = load float* %arrayidx5, align 4, !tbaa !0
%mul6 = fmul float %2, %4
%add7 = fadd float %res.024, %mul6
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %k
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add7, %for.body ]
%mul8 = mul nsw i32 %call1, %n
%add9 = add nsw i32 %mul8, %call
%idxprom10 = sext i32 %add9 to i64
%arrayidx11 = getelementptr inbounds float* %C, i64 %idxprom10
store float %res.0.lcssa, float* %arrayidx11, align 4, !tbaa !0
%.fca.0.insert = insertvalue { float*, i32 } undef, float* %C, 0
%.fca.1.insert = insertvalue { float*, i32 } %.fca.0.insert, i32 %bytes_C, 1
ret { float*, i32 } %.fca.1.insert
}
define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C, i32 %WA, i32 %WB, i32 %HA) {
%kernel = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (float*, i32, float*, i32, float*, i32, i32, i32, i32)* @matrixMul to i8*), i32 %WB, i32 %HA)
; Bind Inputs
call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0)
call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1)
call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2)
call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3)
call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4)
call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5)
call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); h_A
call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); bytes_A
call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); h_B
call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_B
call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); h_C
call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_C
call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); WA = HB = k
call void @llvm.visc.bind.input(i8* %kernel, i32 7, i32 7); WB = WC = n
call void @llvm.visc.bind.input(i8* %kernel, i32 8, i32 8); HA = HC = m
; Bind Outputs
call void @llvm.visc.bind.output(i8* %kernel, i32 0, i32 0)
call void @llvm.visc.bind.output(i8* %kernel, i32 1, i32 1)
call void @llvm.visc.bind.output(i8* %kernel, i32 0, i32 0); d_C
call void @llvm.visc.bind.output(i8* %kernel, i32 1, i32 1); bytes_C
ret %rtype zeroinitializer
}
......@@ -607,14 +654,25 @@ randomInit.exit41: ; preds = %for.body.i40
%in.addr.bytes_A = getelementptr %struct.arg* %in.addr, i32 0, i32 1
%in.addr.h_B = getelementptr %struct.arg* %in.addr, i32 0, i32 2
%in.addr.bytes_B = getelementptr %struct.arg* %in.addr, i32 0, i32 3
%in.addr.h_C = getelementptr %struct.arg* %in.addr, i32 0, i32 4
%in.addr.bytes_C = getelementptr %struct.arg* %in.addr, i32 0, i32 5
%in.addr.WA = getelementptr %struct.arg* %in.addr, i32 0, i32 6
%in.addr.WB = getelementptr %struct.arg* %in.addr, i32 0, i32 7
%in.addr.HA = getelementptr %struct.arg* %in.addr, i32 0, i32 8
store float* %0, float** %in.addr.h_A
store i32 4194304, i32* %in.addr.bytes_A
store float* %1, float** %in.addr.h_B
store i32 4194304, i32* %in.addr.bytes_B
store float* %2, float** %in.addr.h_C
store i32 4194304, i32* %in.addr.bytes_C
store i32 1024, i32* %in.addr.WA
store i32 1024, i32* %in.addr.WB
store i32 1024, i32* %in.addr.HA
; Change type to i8* and VISC Launch call
%args = bitcast %struct.arg* %in.addr to i8*
%graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (float*, i32, float*, i32)* @MatrixMulRoot to i8*), i8* %args)
%graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (float*, i32, float*, i32, float*, i32, i32, i32, i32)* @MatrixMulRoot to i8*), i8* %args)
;tail call void @computeMatrixMul(float* %0, i32 4194304, float* %1, i32 4194304, float* %2, i32 4194304)
; Wait for result
......@@ -627,7 +685,7 @@ randomInit.exit41: ; preds = %for.body.i40
;%2 = extractvalue %rtype %out, 0
%out.bytes_C = extractvalue %rtype %out, 1
; -------------------------------- Completed VISC Launch Call Cod --------------------------------
; -------------------------------- Completed VISC Launch Call --------------------------------
br label %for.cond4.preheader.i
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment