From 293ca7801308882245b8ff5aab52d169e6c94dec Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <prakalps@gmail.com>
Date: Thu, 18 Jun 2015 20:11:55 -0500
Subject: [PATCH] Modified makefiles to add _platform to the build dir

---
 .../VISC/MatrixMultiplication/visc_gemm.ll    | 10 ++++++++-
 .../MatrixMultiplication/visc_gemm_2_level.ll | 10 ++++++++-
 .../visc_gemm_2_level_host.ll                 | 10 ++++++++-
 .../visc_gemm_2_level_outedge.ll              | 10 ++++++++-
 .../visc_gemm_2_level_param.ll                | 10 ++++++++-
 .../MatrixMultiplication/visc_gemm_ptx.ll     | 10 ++++++++-
 .../test/VISC/parboil/benchmarks/lbm/Makefile |  6 ++++-
 .../benchmarks/lbm/src/opencl_nvidia/main.c   |  2 +-
 .../VISC/parboil/benchmarks/sgemm/Makefile    |  6 ++++-
 .../benchmarks/sgemm/src/opencl_base/main.cc  |  2 +-
 .../VISC/parboil/benchmarks/spmv/Makefile     |  7 +++++-
 .../benchmarks/spmv/src/opencl_nvidia/main.c  |  2 +-
 .../VISC/parboil/benchmarks/stencil/Makefile  |  6 ++++-
 .../benchmarks/stencil/src/opencl_base/main.c |  2 +-
 llvm/test/VISC/parboil/common/mk/opencl.mk    | 22 +++++++++++++++++--
 llvm/test/VISC/parboil/common/mk/visc.mk      |  2 +-
 16 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
index cb16f00949..033b481af7 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S %s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -134,6 +134,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -329,6 +335,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -362,6 +369,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll
index a13abfa8f5..ed3e3bf098 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -137,6 +137,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -371,6 +377,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -404,6 +411,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll
index d04b5a7f51..fc3db521db 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S %s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -135,6 +135,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -369,6 +375,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -402,6 +409,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll
index a7f751a52c..b51727e2a6 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -138,6 +138,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -380,6 +386,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -418,6 +425,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   
   %printcall0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @custom_str, i64 0, i64 0), i32 %output_0) #5
   %printcall1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @custom_str, i64 0, i64 0), i32 %output_1) #5
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll
index 69db224a41..30ccd8cc4c 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -137,6 +137,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -372,6 +378,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -409,6 +416,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 11
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
index b7a5da1848..0c3bc24f9d 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -137,6 +137,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -332,6 +338,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -365,6 +372,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/Makefile b/llvm/test/VISC/parboil/benchmarks/lbm/Makefile
index 525e0e680f..b4a0a04550 100644
--- a/llvm/test/VISC/parboil/benchmarks/lbm/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/lbm/Makefile
@@ -11,10 +11,14 @@ ifeq ($(TEST),)
   TEST = short
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),long)
diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c
index 074e5ca195..7894ea9b70 100644
--- a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c
@@ -230,5 +230,5 @@ void OpenCL_initialize(OpenCL_Param* prm)
 
     //free((void*)clSource[0]);
 
-    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia/kernel_offline.nvptx.s", "performStreamCollide_kernel", &prm->clContext, &prm->clDevice, &prm->clProgram, &prm->clKernel);
+    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia_default/kernel_offline.nvptx.s", "performStreamCollide_kernel", &prm->clContext, &prm->clDevice, &prm->clProgram, &prm->clKernel);
 }
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile b/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile
index 23bf006357..b81433c83d 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile
@@ -11,10 +11,14 @@ ifeq ($(TEST),)
   TEST = small
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),small)
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc
index 025a430321..47ac5c4271 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc
@@ -125,7 +125,7 @@ int main (int argc, char *argv[]) {
   // cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
   cl_kernel clKernel;
   cl_program clProgram;
-  pb_CreateAndBuildKernelFromBinary("build/opencl_base/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel);
+  pb_CreateAndBuildKernelFromBinary("build/opencl_base_default/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel);
   //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
   //CHECK_ERROR("clCreateProgramWithSource")
 
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/Makefile b/llvm/test/VISC/parboil/benchmarks/spmv/Makefile
index 48ac1272af..64c0083a0b 100644
--- a/llvm/test/VISC/parboil/benchmarks/spmv/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/Makefile
@@ -11,10 +11,15 @@ ifeq ($(TEST),)
   TEST = small
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),small)
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c
index 62bee2cf96..87088bf98c 100644
--- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c
@@ -78,7 +78,7 @@ int main(int argc, char** argv) {
     //CHECK_ERROR("clCreateKernel")
     cl_kernel clKernel;
     cl_program clProgram;
-    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel);
+    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia_default/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel);
 
     pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
     //parameters declaration
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/Makefile b/llvm/test/VISC/parboil/benchmarks/stencil/Makefile
index fc298d2607..01de9b51b2 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/Makefile
@@ -11,10 +11,14 @@ ifeq ($(TEST),)
   TEST = small
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),small)
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
index 92584fa10f..5238110bae 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
@@ -106,7 +106,7 @@ int main(int argc, char** argv) {
     cl_program clProgram;
     cl_kernel clKernel;
 
-    pb_CreateAndBuildKernelFromBinary("build/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);
+    pb_CreateAndBuildKernelFromBinary("build/opencl_base_default/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);
     //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
     //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
     //CHECK_ERROR("clCreateProgramWithSource")
diff --git a/llvm/test/VISC/parboil/common/mk/opencl.mk b/llvm/test/VISC/parboil/common/mk/opencl.mk
index 5d476be3f7..7c3ed00d10 100644
--- a/llvm/test/VISC/parboil/common/mk/opencl.mk
+++ b/llvm/test/VISC/parboil/common/mk/opencl.mk
@@ -10,6 +10,13 @@ CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS)
 CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS)
 LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS)
 
+
+LLVM_INSTALL:=$(LLVM_SRC_ROOT)/Release+Asserts
+LIBCLC:=$(LLVM_SRC_ROOT)/../libclc
+LLVM_CC:=$(LLVM_INSTALL)/bin/clang
+LLVM_LINK:=$(LLVM_INSTALL)/bin/llvm-link
+
+
 # Rules common to all makefiles
 
 ########################################
@@ -39,6 +46,7 @@ $(error $$BUILDDIR is not set correctly)
 endif
 
 .PHONY: run
+.PRECIOUS: $(BUILDDIR)/%.ll
 
 ifeq ($(OPENCL_PATH),)
 FAILSAFE=no_opencl
@@ -51,6 +59,7 @@ endif
 ########################################
 
 OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
+KERNEL = $(call INBUILDDIR,$(KERNEL_OBJS))
 
 ifeq ($(DEBUGGER),)
 DEBUGGER=gdb
@@ -60,7 +69,7 @@ endif
 # Rules
 ########################################
 
-default: $(FAILSAFE) $(BUILDDIR) $(BIN)
+default: $(FAILSAFE) $(BUILDDIR) $(BIN) $(KERNEL)
 
 run : $(RUNDIR)
 	echo "Resolving OpenCL library..."
@@ -78,7 +87,7 @@ clean :
 	if [ -a $(BIN) ]; then rm $(BIN); fi
 	if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi
 
-$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o
+$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o 
 	$(CXX) $^ -o $@ $(LDFLAGS)
 
 $(RUNDIR) :
@@ -99,6 +108,15 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.cc
 $(BUILDDIR)/%.o : $(SRCDIR)/%.cpp
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
+$(BUILDDIR)/%.nvptx.s : $(BUILDDIR)/%.linked.bc
+	$(LLVM_CC) -O3 -target nvptx $< -S -o $@
+
+$(BUILDDIR)/%.linked.bc : $(BUILDDIR)/%.ll
+	$(LLVM_LINK) $(LIBCLC)/built_libs/nvptx--nvidiacl.bc $< -o $@
+
+$(BUILDDIR)/%.ll : $(SRCDIR)/%.cl
+	$(LLVM_CC) -Dcl_clang_storage_class_specifiers -isystem $(LIBCLC)/generic/include -include clc/clc.h -target nvptx--nvidiacl $< -O3 -emit-llvm -S -o $@
+	sed -e "s/ addrspace(.)//g" -i $@
 no_opencl:
 	@echo "OPENCL_PATH is not set. Open $(PARBOIL_ROOT)/common/Makefile.conf to set default value."
 	@echo "You may use $(PLATFORM_MK) if you want a platform specific configurations."
diff --git a/llvm/test/VISC/parboil/common/mk/visc.mk b/llvm/test/VISC/parboil/common/mk/visc.mk
index 8e14b9cb57..4445260532 100644
--- a/llvm/test/VISC/parboil/common/mk/visc.mk
+++ b/llvm/test/VISC/parboil/common/mk/visc.mk
@@ -123,7 +123,7 @@ clean :
 	rm -rf $(BUILDDIR)/*
 	if [ -a $(BIN) ]; then rm $(BIN); fi
 	if [ -a DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi
-	if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi
+	if [ -d $(BUILDDIR) ]; then rm -rf $(BUILDDIR); fi
 
 #$(APP_BINS) : $(PTX_ASSEMBLY) $(BIN)
 #	echo Generating $(APP_BINS) ...
-- 
GitLab