From cd094866b75dd22fca06210191c3c55a6202be4d Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <psrivas2@illinois.edu>
Date: Sun, 30 Nov 2014 17:36:10 +0000
Subject: [PATCH] Checkpoint commit for visc_mri-q.ll benchmark. First cut

---
 .../benchmarks/mri-q/src/visc/visc_mri-q.ll   | 59 +++++++++++++++++--
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll
index a767cca31e..11213492f0 100644
--- a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll
+++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll
@@ -512,7 +512,7 @@ define void @computePhiMag(i32 %numK, float* %phiR, float* %phiI, float* %phiMag
 }
 
 ; Function Attrs: noinline nounwind uwtable
-define %rtype @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck) #4 {
+define %rtype @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* in %x, i64 %bytes_x, float* in %y, i64 %bytes_y, float* in %z, i64 %bytes_z, float* out %Qr, i64 %bytes_Qr, float* out %Qi, i64 %bytes_Qi, %struct.kValues* in %ck, i64 %bytes_ck) #4 {
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca float*, align 8
@@ -828,7 +828,7 @@ declare double @sin(double) #5
 
 
 ; ----------------- Compute Q internal node ----------------
-define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck, i64 %DimQBlock) #4 {
+define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* in %x, i64 %bytes_x, float* in %y, i64 %bytes_y, float* in %z, i64 %bytes_z, float* out %Qr, i64 %bytes_Qr, float* out %Qi, i64 %bytes_Qi, %struct.kValues* in %ck, i64 %bytes_ck, i64 %DimQBlock) #4 {
   %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64)* @computeQ_kernel to i8*), i64 %DimQBlock)
   ; Bind Inputs
   call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); numK
@@ -851,7 +851,7 @@ define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %b
 }
 
 ; ----------------- Compute Q root node ----------------
-define %rtype @computeQ_root(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck, i64 %DimQBlock, i64 %DimQGrid) #4 {
+define %rtype @computeQ_root(i32 %numK, i32 %kGlobalIndex, float* in %x, i64 %bytes_x, float* in %y, i64 %bytes_y, float* in %z, i64 %bytes_z, float* out %Qr, i64 %bytes_Qr, float* out %Qi, i64 %bytes_Qi, %struct.kValues* in %ck, i64 %bytes_ck, i64 %DimQBlock, i64 %DimQGrid) #4 {
   %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64, i64)* @computeQ_internal to i8*), i64 %DimQGrid)
   ; Bind Inputs
   call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); numK
@@ -995,9 +995,60 @@ define void @computeQ(i32 %numK, i32 %numX, float* %x, float* %y, float* %z, %st
   %70 = load i64* %bytes_x, align 8
   %71 = load %struct.kValues** %kValsTile, align 8
   %72 = load i64* %bytes_kValTile, align 8
-  call void @computeQ_kernel(i32 %59, i32 %60, float* %61, i64 %62, float* %63, i64 %64, float* %65, i64 %66, float* %67, i64 %68, float* %69, i64 %70, %struct.kValues* %71, i64 %72)
   %73 = load i64* %DimQGrid, align 8
   %74 = load i64* %DimQBlock, align 8
+  ; ---------------------------------- Adding VISC Launch Call --------------------------------
+  ; Replaced - call void @computeQ_kernel(i32 %59, i32 %60, float* %61, i64 %62, float* %63, i64 %64, float* %65, i64 %66, float* %67, i64 %68, float* %69, i64 %70, %struct.kValues* %71, i64 %72)
+  ; Setting up launch input args
+  %in.addr = alloca %struct.arg.Q
+
+  ; Store arguments
+  %in.addr.numK = getelementptr %struct.arg* %in.addr, i32 0, i32 0
+  %in.addr.kGlobalIndex = getelementptr %struct.arg* %in.addr, i32 0, i32 1
+  %in.addr.x = getelementptr %struct.arg* %in.addr, i32 0, i32 2
+  %in.addr.bytes_x = getelementptr %struct.arg* %in.addr, i32 0, i32 3
+  %in.addr.y = getelementptr %struct.arg* %in.addr, i32 0, i32 4
+  %in.addr.bytes_y = getelementptr %struct.arg* %in.addr, i32 0, i32 5
+  %in.addr.z = getelementptr %struct.arg* %in.addr, i32 0, i32 6
+  %in.addr.bytes_z = getelementptr %struct.arg* %in.addr, i32 0, i32 7
+  %in.addr.Qr = getelementptr %struct.arg* %in.addr, i32 0, i32 8
+  %in.addr.bytes_Qr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
+  %in.addr.Qi = getelementptr %struct.arg* %in.addr, i32 0, i32 10
+  %in.addr.bytes_Qi = getelementptr %struct.arg* %in.addr, i32 0, i32 11
+  %in.addr.ck = getelementptr %struct.arg* %in.addr, i32 0, i32 12
+  %in.addr.bytes_ck = getelementptr %struct.arg* %in.addr, i32 0, i32 13
+  %in.addr.DimQBlock = getelementptr %struct.arg* %in.addr, i32 0, i32 14
+  %in.addr.DimQGrid = getelementptr %struct.arg* %in.addr, i32 0, i32 15
+
+  store i32 %59, i32* %in.addr.numK
+  store i32 %60, i32* %in.addr.kGlobalIndex
+  store float* %61, float** %in.addr.x
+  store i64 %62, i64* %in.addr.bytes_x
+  store float* %63, float** %in.addr.y
+  store i64 %64, i64* %in.addr.bytes_y
+  store float* %65, float** %in.addr.z
+  store i64 %66, i64* %in.addr.bytes_z
+  store float* %67, float** %in.addr.Qr
+  store i64 %68, i64* %in.addr.bytes_Qr
+  store float* %69, float** %in.addr.Qi
+  store i64 %70, i64* %in.addr.bytes_Qi
+  store %struct.kValues* %71, %struct.kValues** %in.addr.ck
+  store i64 %72, i64* %in.addr.bytes_ck
+  store i64 %73, i64* %in.addr.DimQBlock
+  store i64 %74, i64* %in.addr.DimQGrid
+
+  ; Change type to i8* and VISC launch call to computeQ_root
+  %args = bitcast %struct.arg.Q* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64, i64, i64)* @computeQ_root to i8*), i8* %args)
+  
+  ; Wait for result
+  call void @llvm.visc.wait(i8* %graphID)
+
+  ; Get the result
+  ; -- Not required as all output is through side effects --
+  ; -------------------------------- Completed VISC Launch Call --------------------------------
+ 
+
   %75 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str4, i32 0, i32 0), i64 %73, i64 %74)
   br label %76
 
-- 
GitLab