Skip to content
Snippets Groups Projects
Commit 4273d700 authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

MatrixMul working with X86

parent ead1071c
No related branches found
No related tags found
No related merge requests found
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@custom_str = private unnamed_addr constant [12 x i8] c"Value = %d\0A\00", align 1
@hex_str = private unnamed_addr constant [14 x i8] c"Value = 0x%x\0A\00", align 1
@.str = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1 @.str = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1
@.str2 = private unnamed_addr constant [28 x i8] c"Computing element (%d, %d)\0A\00", align 1 @.str2 = private unnamed_addr constant [28 x i8] c"Computing element (%d, %d)\0A\00", align 1
@.str3 = private unnamed_addr constant [32 x i8] c"Accessing k = %d, A[%d], B[%d]\0A\00", align 1 @.str3 = private unnamed_addr constant [32 x i8] c"Accessing k = %d, A[%d], B[%d]\0A\00", align 1
...@@ -130,7 +132,7 @@ declare i32 @printf(i8* nocapture, ...) #1 ...@@ -130,7 +132,7 @@ declare i32 @printf(i8* nocapture, ...) #1
; --------------- VISC Intrinsics --------------- ; --------------- VISC Intrinsics ---------------
; Return Type of VISC Compute Matrix Mul ; Return Type of VISC Compute Matrix Mul
%rtype = type {float*, i32} %rtype = type {float*, i32}
%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype } %struct.arg = type <{ float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }>
; Function Attrs: nounwind ; Function Attrs: nounwind
declare i8* @llvm.visc.launch(i8*, i8*) #0 declare i8* @llvm.visc.launch(i8*, i8*) #0
...@@ -178,7 +180,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32) ...@@ -178,7 +180,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
; Function Attrs: nounwind uwtable ; Function Attrs: nounwind uwtable
define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 { define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 {
entry: entry:
%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0)) ;%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0))
; ------------------------- VISC changes ------------------ ; ------------------------- VISC changes ------------------
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x ; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
...@@ -190,7 +192,7 @@ entry: ...@@ -190,7 +192,7 @@ entry:
%call2 = call i32 @llvm.visc.getNodeInstanceID.y(i8* %this_node) %call2 = call i32 @llvm.visc.getNodeInstanceID.y(i8* %this_node)
; ---------------------- VISC changes End ------------------ ; ---------------------- VISC changes End ------------------
%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5 ;%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5
%cmp44 = icmp eq i32 %k, 0 %cmp44 = icmp eq i32 %k, 0
br i1 %cmp44, label %for.end, label %for.body.lr.ph br i1 %cmp44, label %for.end, label %for.body.lr.ph
...@@ -205,7 +207,7 @@ for.body: ; preds = %for.body, %for.body ...@@ -205,7 +207,7 @@ for.body: ; preds = %for.body, %for.body
%add = add i32 %0, %mul %add = add i32 %0, %mul
%mul4 = mul i32 %0, %n %mul4 = mul i32 %0, %n
%add5 = add i32 %mul4, %call1 %add5 = add i32 %mul4, %call1
%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5 ;%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5
%idxprom = zext i32 %add to i64 %idxprom = zext i32 %add to i64
%arrayidx = getelementptr inbounds float* %A, i64 %idxprom %arrayidx = getelementptr inbounds float* %A, i64 %idxprom
%1 = load float* %arrayidx, align 4, !tbaa !0 %1 = load float* %arrayidx, align 4, !tbaa !0
...@@ -221,14 +223,14 @@ for.body: ; preds = %for.body, %for.body ...@@ -221,14 +223,14 @@ for.body: ; preds = %for.body, %for.body
for.end: ; preds = %for.body, %entry for.end: ; preds = %for.body, %entry
%res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add14, %for.body ] %res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add14, %for.body ]
%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0)) ;%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0))
%mul16 = mul i32 %call2, %n %mul16 = mul i32 %call2, %n
%add17 = add i32 %mul16, %call1 %add17 = add i32 %mul16, %call1
%idxprom18 = zext i32 %add17 to i64 %idxprom18 = zext i32 %add17 to i64
%arrayidx19 = getelementptr inbounds float* %C, i64 %idxprom18 %arrayidx19 = getelementptr inbounds float* %C, i64 %idxprom18
store float %res.0.lcssa, float* %arrayidx19, align 4, !tbaa !0 store float %res.0.lcssa, float* %arrayidx19, align 4, !tbaa !0
%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0)) ;%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0))
%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0)) ;%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0))
%.fca.0.insert = insertvalue %rtype undef, float* %C, 0 %.fca.0.insert = insertvalue %rtype undef, float* %C, 0
%.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1 %.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1
ret %rtype %.fca.1.insert ret %rtype %.fca.1.insert
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment