MatrixMul working with X86

4273d700 · Prakalp Srivastava · ead1071c · 4273d700
Commit 4273d700 authored 10 years ago by Prakalp Srivastava
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
@@ -6,6 +6,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
+@custom_str = private unnamed_addr constant [12 x i8] c"Value = %d\0A\00", align 1
+@hex_str = private unnamed_addr constant [14 x i8] c"Value = 0x%x\0A\00", align 1
 @.str = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1
 @.str2 = private unnamed_addr constant [28 x i8] c"Computing element (%d, %d)\0A\00", align 1
 @.str3 = private unnamed_addr constant [32 x i8] c"Accessing k = %d, A[%d], B[%d]\0A\00", align 1
@@ -130,7 +132,7 @@ declare i32 @printf(i8* nocapture, ...) #1
 ; --------------- VISC Intrinsics ---------------
 ; Return Type of VISC Compute Matrix Mul
 %rtype = type {float*, i32}
-%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }
+%struct.arg = type <{ float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }>
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
@@ -178,7 +180,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 {
 entry:
-  %puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0))
+  ;%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0))
  ; ------------------------- VISC changes ------------------
  ; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
@@ -190,7 +192,7 @@ entry:
  %call2 = call i32 @llvm.visc.getNodeInstanceID.y(i8* %this_node)
  ; ---------------------- VISC changes End ------------------
-  %call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5
+  ;%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5
  %cmp44 = icmp eq i32 %k, 0
  br i1 %cmp44, label %for.end, label %for.body.lr.ph
@@ -205,7 +207,7 @@ for.body:                                         ; preds = %for.body, %for.body
  %add = add i32 %0, %mul
  %mul4 = mul i32 %0, %n
  %add5 = add i32 %mul4, %call1
-  %call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5
+  ;%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5
  %idxprom = zext i32 %add to i64
  %arrayidx = getelementptr inbounds float* %A, i64 %idxprom
  %1 = load float* %arrayidx, align 4, !tbaa !0
@@ -221,14 +223,14 @@ for.body:                                         ; preds = %for.body, %for.body
 for.end:                                          ; preds = %for.body, %entry
  %res.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add14, %for.body ]
-  %puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0))
+  ;%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0))
  %mul16 = mul i32 %call2, %n
  %add17 = add i32 %mul16, %call1
  %idxprom18 = zext i32 %add17 to i64
  %arrayidx19 = getelementptr inbounds float* %C, i64 %idxprom18
  store float %res.0.lcssa, float* %arrayidx19, align 4, !tbaa !0
-  %puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0))
+  ;%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0))
-  %puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0))
+  ;%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0))
  %.fca.0.insert = insertvalue %rtype undef, float* %C, 0
  %.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1
  ret %rtype %.fca.1.insert