diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
index cb16f00949ff54ebf50d70f2fde6b93ef1e9324d..033b481af786dd936b0fbe383f3723d5faf237c6 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S %s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -134,6 +134,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -329,6 +335,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -362,6 +369,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll
index a13abfa8f5e0cdd9861adbac08aa0817ccb4c03a..ed3e3bf0985c24ac5785137be91e67ac298093b4 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -137,6 +137,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -371,6 +377,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -404,6 +411,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll
index d04b5a7f51599408b62fa27b254acc0d7eb1b6a4..fc3db521db174e58626a5c4daf109061530bb250 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_host.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S %s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -135,6 +135,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -369,6 +375,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -402,6 +409,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll
index a7f751a52ceab9066eabc4e943ea59efcefa5a93..b51727e2a674de58cb83cbba2baf024b7aaebfab 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_outedge.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -138,6 +138,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -380,6 +386,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -418,6 +425,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   
   %printcall0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @custom_str, i64 0, i64 0), i32 %output_0) #5
   %printcall1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @custom_str, i64 0, i64 0), i32 %output_1) #5
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll
index 69db224a413206727cbecc8a542dc0b353dfe6ff..30ccd8cc4c33e9287a0673c9ce537e6ccfb24b21 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_2_level_param.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -137,6 +137,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -372,6 +378,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -409,6 +416,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 11
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll b/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
index b7a5da184865ca469ec41a0c75ab82798f941585..0c3bc24f9dc5575783e3002115cc8976dfb3325a 100644
--- a/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
+++ b/llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-link %llvm_src/../libclc/built_libs/nvptx--nvidiacl.bc %s.kernels.ll -o %t.ll.kernels.linked.bc
 ; RUN: clang -O3 -target nvptx %t.ll.kernels.linked.bc -S -o %s.nvptx.s
 ; RUN: llvm-link %t.ll %llvm_src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin
 ; ModuleID = 'gemm_opencl.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -137,6 +137,12 @@ declare i32 @printf(i8* nocapture, ...) #1
 %rtype = type {}
 %struct.arg = type <{ float*, i64, float*, i64, float*, i64, i32, i32, i32, %rtype }>
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
 
@@ -332,6 +338,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
   ; Setting up launch input args
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
 
   ; Store arguments
@@ -365,6 +372,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   ; Get the result
   %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
   %out = load %rtype* %out.addr
+  call void @llvm.visc.cleanup()
   ; -------------------------------- Completed VISC Launch Call --------------------------------
 
   %call14 = tail call i32 @checkResults(float* %0, float* %1, float* %2)
diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/Makefile b/llvm/test/VISC/parboil/benchmarks/lbm/Makefile
index 525e0e680ffbf911a9941f39662dcff12591d1c0..b4a0a045507a80b3efe7f76b25ca8aa9bbde54ab 100644
--- a/llvm/test/VISC/parboil/benchmarks/lbm/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/lbm/Makefile
@@ -11,10 +11,14 @@ ifeq ($(TEST),)
   TEST = short
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),long)
diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c
index 074e5ca195383a19b7900df0779de80697b7d217..7894ea9b70ae0325fdd80afbe3bbd6673022067e 100644
--- a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c
@@ -230,5 +230,5 @@ void OpenCL_initialize(OpenCL_Param* prm)
 
     //free((void*)clSource[0]);
 
-    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia/kernel_offline.nvptx.s", "performStreamCollide_kernel", &prm->clContext, &prm->clDevice, &prm->clProgram, &prm->clKernel);
+    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia_default/kernel_offline.nvptx.s", "performStreamCollide_kernel", &prm->clContext, &prm->clDevice, &prm->clProgram, &prm->clKernel);
 }
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile b/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile
index 23bf0063576bb7f2f4104bb87d32a252ad88071f..b81433c83df8645bb9166bead85dc9c259e5f524 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/Makefile
@@ -11,10 +11,14 @@ ifeq ($(TEST),)
   TEST = small
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),small)
diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc
index 025a430321e5a470bc94d6d123cc806efbde0fa1..47ac5c4271f7bd2e858d1ef0c95b0f795c592d22 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc
@@ -125,7 +125,7 @@ int main (int argc, char *argv[]) {
   // cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
   cl_kernel clKernel;
   cl_program clProgram;
-  pb_CreateAndBuildKernelFromBinary("build/opencl_base/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel);
+  pb_CreateAndBuildKernelFromBinary("build/opencl_base_default/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel);
   //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
   //CHECK_ERROR("clCreateProgramWithSource")
 
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/Makefile b/llvm/test/VISC/parboil/benchmarks/spmv/Makefile
index 48ac1272af80d7aeafdc41c2a8ae1c6979d46d07..64c0083a0b2b2aaf21e7b0eb3a178e16d608e6da 100644
--- a/llvm/test/VISC/parboil/benchmarks/spmv/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/Makefile
@@ -11,10 +11,15 @@ ifeq ($(TEST),)
   TEST = small
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),small)
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c
index 62bee2cf96a29364fcea2243c263477599cd7e67..87088bf98c952455adfab5308ec264cb69b65b28 100644
--- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c
@@ -78,7 +78,7 @@ int main(int argc, char** argv) {
     //CHECK_ERROR("clCreateKernel")
     cl_kernel clKernel;
     cl_program clProgram;
-    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel);
+    pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia_default/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel);
 
     pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
     //parameters declaration
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/Makefile b/llvm/test/VISC/parboil/benchmarks/stencil/Makefile
index fc298d260706c776912729593196e203c3227ff6..01de9b51b2a90082a6193470e62ad767ae9f3613 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/Makefile
@@ -11,10 +11,14 @@ ifeq ($(TEST),)
   TEST = small
 endif
 
+ifeq ($(PLATFORM),)
+PLATFORM=default
+endif
+
 BIN = $(addsuffix -$(VERSION), $(APP))
 
 SRCDIR = src/$(VERSION)
-BUILDDIR = build/$(VERSION)
+BUILDDIR = build/$(VERSION)_$(PLATFORM)
 DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
 
 ifeq ($(TEST),small)
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
index 92584fa10fa62bd475ae4f16996d25397a72fc56..5238110baecfa7b44cc5d623cc73455c89a04254 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
@@ -106,7 +106,7 @@ int main(int argc, char** argv) {
     cl_program clProgram;
     cl_kernel clKernel;
 
-    pb_CreateAndBuildKernelFromBinary("build/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);
+    pb_CreateAndBuildKernelFromBinary("build/opencl_base_default/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);
     //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
     //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
     //CHECK_ERROR("clCreateProgramWithSource")
diff --git a/llvm/test/VISC/parboil/common/mk/opencl.mk b/llvm/test/VISC/parboil/common/mk/opencl.mk
index 5d476be3f725fc89fcd4668d677730f7aedfca41..7c3ed00d10931f361eca7b6f1ef15294b38cd660 100644
--- a/llvm/test/VISC/parboil/common/mk/opencl.mk
+++ b/llvm/test/VISC/parboil/common/mk/opencl.mk
@@ -10,6 +10,13 @@ CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS)
 CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS)
 LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS)
 
+
+LLVM_INSTALL:=$(LLVM_SRC_ROOT)/Release+Asserts
+LIBCLC:=$(LLVM_SRC_ROOT)/../libclc
+LLVM_CC:=$(LLVM_INSTALL)/bin/clang
+LLVM_LINK:=$(LLVM_INSTALL)/bin/llvm-link
+
+
 # Rules common to all makefiles
 
 ########################################
@@ -39,6 +46,7 @@ $(error $$BUILDDIR is not set correctly)
 endif
 
 .PHONY: run
+.PRECIOUS: $(BUILDDIR)/%.ll
 
 ifeq ($(OPENCL_PATH),)
 FAILSAFE=no_opencl
@@ -51,6 +59,7 @@ endif
 ########################################
 
 OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
+KERNEL = $(call INBUILDDIR,$(KERNEL_OBJS))
 
 ifeq ($(DEBUGGER),)
 DEBUGGER=gdb
@@ -60,7 +69,7 @@ endif
 # Rules
 ########################################
 
-default: $(FAILSAFE) $(BUILDDIR) $(BIN)
+default: $(FAILSAFE) $(BUILDDIR) $(BIN) $(KERNEL)
 
 run : $(RUNDIR)
 	echo "Resolving OpenCL library..."
@@ -78,7 +87,7 @@ clean :
 	if [ -a $(BIN) ]; then rm $(BIN); fi
 	if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi
 
-$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o
+$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o 
 	$(CXX) $^ -o $@ $(LDFLAGS)
 
 $(RUNDIR) :
@@ -99,6 +108,15 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.cc
 $(BUILDDIR)/%.o : $(SRCDIR)/%.cpp
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
+$(BUILDDIR)/%.nvptx.s : $(BUILDDIR)/%.linked.bc
+	$(LLVM_CC) -O3 -target nvptx $< -S -o $@
+
+$(BUILDDIR)/%.linked.bc : $(BUILDDIR)/%.ll
+	$(LLVM_LINK) $(LIBCLC)/built_libs/nvptx--nvidiacl.bc $< -o $@
+
+$(BUILDDIR)/%.ll : $(SRCDIR)/%.cl
+	$(LLVM_CC) -Dcl_clang_storage_class_specifiers -isystem $(LIBCLC)/generic/include -include clc/clc.h -target nvptx--nvidiacl $< -O3 -emit-llvm -S -o $@
+	sed -e "s/ addrspace(.)//g" -i $@
 no_opencl:
 	@echo "OPENCL_PATH is not set. Open $(PARBOIL_ROOT)/common/Makefile.conf to set default value."
 	@echo "You may use $(PLATFORM_MK) if you want a platform specific configurations."
diff --git a/llvm/test/VISC/parboil/common/mk/visc.mk b/llvm/test/VISC/parboil/common/mk/visc.mk
index 8e14b9cb57ca4b2b0ece54a0d97222261b22a323..44452605322f1a95f06a9d42ddb3c71f03b45bb3 100644
--- a/llvm/test/VISC/parboil/common/mk/visc.mk
+++ b/llvm/test/VISC/parboil/common/mk/visc.mk
@@ -123,7 +123,7 @@ clean :
 	rm -rf $(BUILDDIR)/*
 	if [ -a $(BIN) ]; then rm $(BIN); fi
 	if [ -a DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi
-	if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi
+	if [ -d $(BUILDDIR) ]; then rm -rf $(BUILDDIR); fi
 
 #$(APP_BINS) : $(PTX_ASSEMBLY) $(BIN)
 #	echo Generating $(APP_BINS) ...