Skip to content
Snippets Groups Projects
Commit 4273d700 authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

MatrixMul working with X86

parent ead1071c
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,8 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@custom_str = private unnamed_addr constant [12 x i8] c"Value = %d\0A\00", align 1
@hex_str = private unnamed_addr constant [14 x i8] c"Value = 0x%x\0A\00", align 1
@.str = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1
@.str2 = private unnamed_addr constant [28 x i8] c"Computing element (%d, %d)\0A\00", align 1
@.str3 = private unnamed_addr constant [32 x i8] c"Accessing k = %d, A[%d], B[%d]\0A\00", align 1
......@@ -130,7 +132,7 @@ declare i32 @printf(i8* nocapture, ...) #1
; --------------- VISC Intrinsics ---------------
; Return Type of VISC Compute Matrix Mul
%rtype = type {float*, i32}
%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }
%struct.arg = type <{ float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }>
; Function Attrs: nounwind
declare i8* @llvm.visc.launch(i8*, i8*) #0
......@@ -178,7 +180,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
; Function Attrs: nounwind uwtable
define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 {
entry:
%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0))
;%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0))
; ------------------------- VISC changes ------------------
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
......@@ -190,7 +192,7 @@ entry:
%call2 = call i32 @llvm.visc.getNodeInstanceID.y(i8* %this_node)
; ---------------------- VISC changes End ------------------
%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5
;%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5
%cmp44 = icmp eq i32 %k, 0
br i1 %cmp44, label %for.end, label %for.body.lr.ph
......@@ -205,7 +207,7 @@ for.body: ; preds = %for.body, %for.body
%add = add i32 %0, %mul
%mul4 = mul i32 %0, %n
%add5 = add i32 %mul4, %call1
%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5
;%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5
%idxprom = zext i32 %add to i64
%arrayidx = getelementptr inbounds float* %A, i64 %idxprom
%1 = load float* %arrayidx, align 4, !tbaa !0
......@@ -221,14 +223,14 @@ for.body: ; preds = %for.body, %for.body
for.end: ; preds = %for.body, %entry
%res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add14, %for.body ]
%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0))
;%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0))
%mul16 = mul i32 %call2, %n
%add17 = add i32 %mul16, %call1
%idxprom18 = zext i32 %add17 to i64
%arrayidx19 = getelementptr inbounds float* %C, i64 %idxprom18
store float %res.0.lcssa, float* %arrayidx19, align 4, !tbaa !0
%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0))
%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0))
;%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0))
;%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0))
%.fca.0.insert = insertvalue %rtype undef, float* %C, 0
%.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1
ret %rtype %.fca.1.insert
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment