From cd094866b75dd22fca06210191c3c55a6202be4d Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Sun, 30 Nov 2014 17:36:10 +0000 Subject: [PATCH] Checkpoint commit for visc_mri-q.ll benchmark. First cut --- .../benchmarks/mri-q/src/visc/visc_mri-q.ll | 59 +++++++++++++++++-- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll index a767cca31e..11213492f0 100644 --- a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll @@ -512,7 +512,7 @@ define void @computePhiMag(i32 %numK, float* %phiR, float* %phiI, float* %phiMag } ; Function Attrs: noinline nounwind uwtable -define %rtype @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck) #4 { +define %rtype @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* in %x, i64 %bytes_x, float* in %y, i64 %bytes_y, float* in %z, i64 %bytes_z, float* out %Qr, i64 %bytes_Qr, float* out %Qi, i64 %bytes_Qi, %struct.kValues* in %ck, i64 %bytes_ck) #4 { %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca float*, align 8 @@ -828,7 +828,7 @@ declare double @sin(double) #5 ; ----------------- Compute Q internal node ---------------- -define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck, i64 %DimQBlock) #4 { +define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* in %x, i64 %bytes_x, float* in %y, i64 %bytes_y, float* in %z, i64 %bytes_z, float* out %Qr, i64 %bytes_Qr, float* out %Qi, i64 %bytes_Qi, %struct.kValues* in %ck, i64 %bytes_ck, i64 %DimQBlock) #4 { %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64)* @computeQ_kernel to i8*), i64 %DimQBlock) ; Bind Inputs call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); numK @@ -851,7 +851,7 @@ define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %b } ; ----------------- Compute Q root node ---------------- -define %rtype @computeQ_root(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck, i64 %DimQBlock, i64 %DimQGrid) #4 { +define %rtype @computeQ_root(i32 %numK, i32 %kGlobalIndex, float* in %x, i64 %bytes_x, float* in %y, i64 %bytes_y, float* in %z, i64 %bytes_z, float* out %Qr, i64 %bytes_Qr, float* out %Qi, i64 %bytes_Qi, %struct.kValues* in %ck, i64 %bytes_ck, i64 %DimQBlock, i64 %DimQGrid) #4 { %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64, i64)* @computeQ_internal to i8*), i64 %DimQGrid) ; Bind Inputs call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); numK @@ -995,9 +995,60 @@ define void @computeQ(i32 %numK, i32 %numX, float* %x, float* %y, float* %z, %st %70 = load i64* %bytes_x, align 8 %71 = load %struct.kValues** %kValsTile, align 8 %72 = load i64* %bytes_kValTile, align 8 - call void @computeQ_kernel(i32 %59, i32 %60, float* %61, i64 %62, float* %63, i64 %64, float* %65, i64 %66, float* %67, i64 %68, float* %69, i64 %70, %struct.kValues* %71, i64 %72) %73 = load i64* %DimQGrid, align 8 %74 = load i64* %DimQBlock, align 8 + ; ---------------------------------- Adding VISC Launch Call -------------------------------- + ; Replaced - call void @computeQ_kernel(i32 %59, i32 %60, float* %61, i64 %62, float* %63, i64 %64, float* %65, i64 %66, float* %67, i64 %68, float* %69, i64 %70, %struct.kValues* %71, i64 %72) + ; Setting up launch input args + %in.addr = alloca %struct.arg.Q + + ; Store arguments + %in.addr.numK = getelementptr %struct.arg* %in.addr, i32 0, i32 0 + %in.addr.kGlobalIndex = getelementptr %struct.arg* %in.addr, i32 0, i32 1 + %in.addr.x = getelementptr %struct.arg* %in.addr, i32 0, i32 2 + %in.addr.bytes_x = getelementptr %struct.arg* %in.addr, i32 0, i32 3 + %in.addr.y = getelementptr %struct.arg* %in.addr, i32 0, i32 4 + %in.addr.bytes_y = getelementptr %struct.arg* %in.addr, i32 0, i32 5 + %in.addr.z = getelementptr %struct.arg* %in.addr, i32 0, i32 6 + %in.addr.bytes_z = getelementptr %struct.arg* %in.addr, i32 0, i32 7 + %in.addr.Qr = getelementptr %struct.arg* %in.addr, i32 0, i32 8 + %in.addr.bytes_Qr = getelementptr %struct.arg* %in.addr, i32 0, i32 9 + %in.addr.Qi = getelementptr %struct.arg* %in.addr, i32 0, i32 10 + %in.addr.bytes_Qi = getelementptr %struct.arg* %in.addr, i32 0, i32 11 + %in.addr.ck = getelementptr %struct.arg* %in.addr, i32 0, i32 12 + %in.addr.bytes_ck = getelementptr %struct.arg* %in.addr, i32 0, i32 13 + %in.addr.DimQBlock = getelementptr %struct.arg* %in.addr, i32 0, i32 14 + %in.addr.DimQGrid = getelementptr %struct.arg* %in.addr, i32 0, i32 15 + + store i32 %59, i32* %in.addr.numK + store i32 %60, i32* %in.addr.kGlobalIndex + store float* %61, float** %in.addr.x + store i64 %62, i64* %in.addr.bytes_x + store float* %63, float** %in.addr.y + store i64 %64, i64* %in.addr.bytes_y + store float* %65, float** %in.addr.z + store i64 %66, i64* %in.addr.bytes_z + store float* %67, float** %in.addr.Qr + store i64 %68, i64* %in.addr.bytes_Qr + store float* %69, float** %in.addr.Qi + store i64 %70, i64* %in.addr.bytes_Qi + store %struct.kValues* %71, %struct.kValues** %in.addr.ck + store i64 %72, i64* %in.addr.bytes_ck + store i64 %73, i64* %in.addr.DimQBlock + store i64 %74, i64* %in.addr.DimQGrid + + ; Change type to i8* and VISC launch call to computeQ_root + %args = bitcast %struct.arg.Q* %in.addr to i8* + %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64, i64, i64)* @computeQ_root to i8*), i8* %args) + + ; Wait for result + call void @llvm.visc.wait(i8* %graphID) + + ; Get the result + ; -- Not required as all output is through side effects -- + ; -------------------------------- Completed VISC Launch Call -------------------------------- + + %75 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str4, i32 0, i32 0), i64 %73, i64 %74) br label %76 -- GitLab