Skip to content
Snippets Groups Projects
Commit a5a423fc authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

(1) visc-rt.cpp modified to have correct opencl read/write flags depending on

    in out attributes of arguments
(2) check-point commit in preparing mri-q benchmark
parent 78714353
No related branches found
No related tags found
No related merge requests found
...@@ -113,9 +113,9 @@ void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size ...@@ -113,9 +113,9 @@ void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size
cl_int errcode; cl_int errcode;
cl_mem_flags clFlags; cl_mem_flags clFlags;
if(isInput && isOutput) clFlags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; if(isInput && isOutput) clFlags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
else if(isInput) clFlags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; else if(isInput) clFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR;
else if(isOutput) clFlags = CL_MEM_READ_WRITE; else if(isOutput) clFlags = CL_MEM_WRITE_ONLY;
else clFlags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; else clFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR;
cl_mem d_input = clCreateBuffer(Context->clGPUContext, clFlags, size, input, &errcode); cl_mem d_input = clCreateBuffer(Context->clGPUContext, clFlags, size, input, &errcode);
checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device"); checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device");
......
#include "macros.h" #include "macros.h"
#define NC 4 #define NC 4
#define KERNEL_Q_K_ELEMS_PER_GRID 256
#define COARSE_GENERAL #define COARSE_GENERAL
// #define COARSE_SPEC NC // #define COARSE_SPEC NC
......
#ifndef __MACROS__ #ifndef __MACROS__
#define __MACROS__ //#define __MACROS__
#define PI 3.1415926535897932384626433832795029f #define PI 3.1415926535897932384626433832795029f
#define PIx2 6.2831853071795864769252867665590058f #define PIx2 6.2831853071795864769252867665590058f
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256 /* 512 */ #define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256 /* 512 */
#define KERNEL_Q_THREADS_PER_BLOCK 256 #define KERNEL_Q_THREADS_PER_BLOCK 256
#define KERNEL_Q_K_ELEMS_PER_GRID 1024 #define KERNEL_Q_K_ELEMS_PER_GRID 256
struct kValues { struct kValues {
float Kx; float Kx;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment