Skip to content
Snippets Groups Projects
Commit 95d31417 authored by Maria Kotsifakou's avatar Maria Kotsifakou
Browse files

VISC version of SPMV parboil benchmark.

parent b9a8b0b0
No related branches found
No related tags found
No related merge requests found
PARBOIL_ROOT = /home/kotsifa2/visc/llvm/test/VISC/parboil
APP = spmv
# Default compile visc
ifeq ($(VERSION),)
VERSION = visc
endif
# Default use small test case
ifeq ($(TEST),)
TEST = small
endif
BIN = $(addsuffix -$(VERSION), $(APP))
SRCDIR = src/$(VERSION)
BUILDDIR = build/$(VERSION)
DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
ifeq ($(TEST),small)
INPUT1 = $(DATASET_DIR)/small/input/1138_bus.mtx
INPUT2 = $(DATASET_DIR)/small/input/vector.bin
REF_OUTPUT = $(DATASET_DIR)/small/output/1138_bus.mtx.out
RUNDIR = run/$(VERSION)/small
OUTPUT = $(RUNDIR)/1138_bus.mtx.out
else ifeq ($(TEST),medium)
INPUT1 = $(DATASET_DIR)/medium/input/bcsstk18.mtx
INPUT2 = $(DATASET_DIR)/medium/input/vector.bin
REF_OUTPUT = $(DATASET_DIR)/medium/output/bcsstk18.mtx.out
RUNDIR = run/$(VERSION)/medium
OUTPUT = $(RUNDIR)/bcsstk18.mtx.out
else
INPUT1 = $(DATASET_DIR)/large/input/Dubcova3.mtx.bin
INPUT2 = $(DATASET_DIR)/large/input/vector.bin
REF_OUTPUT = $(DATASET_DIR)/large/output/Dubcova3.mtx.out
RUNDIR = run/$(VERSION)/large
OUTPUT = $(RUNDIR)/Dubcova3.mtx.out
endif
ARGS = -i $(INPUT1) $(INPUT2) -o $(OUTPUT)
TOOL = tools/compare-output
include $(PARBOIL_ROOT)/common/mk/Makefile
# (c) 2010 The Board of Trustees of the University of Illinois.
LANGUAGE=visc
TOOLS_SRC=common_src/convert-dataset
SRCDIR_OBJS=main.visc.ll gpu_info.ll file.ll
APP_CUDALDFLAGS=-lm
APP_CFLAGS=-ffast-math -g3 -O3 -I$(TOOLS_SRC)
APP_CXXFLAGS=-ffast-math -g3 -O3
include $(TOOLS_SRC)/commontools.mk
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#if __BYTE_ORDER != __LITTLE_ENDIAN
# error "File I/O is not implemented for this system: wrong endianness."
#endif
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
float** h_data, int** h_indices, int** h_ptr,
int** h_perm, int** h_nzcnt)
{
FILE* fid = fopen(fName, "rb");
if (fid == NULL)
{
fprintf(stderr, "Cannot open input file\n");
exit(-1);
}
fscanf(fid, "%d %d %d %d %d\n",len,depth,nzcnt_len,dim,pad);
int _len=len[0];
int _depth=depth[0];
int _dim=dim[0];
int _pad=pad[0];
int _nzcnt_len=nzcnt_len[0];
*h_data = (float *) malloc(_len * sizeof (float));
fread (*h_data, sizeof (float), _len, fid);
*h_indices = (int *) malloc(_len * sizeof (int));
fread (*h_indices, sizeof (int), _len, fid);
*h_ptr = (int *) malloc(_depth * sizeof (int));
fread (*h_ptr, sizeof (int), _depth, fid);
*h_perm = (int *) malloc(_dim * sizeof (int));
fread (*h_perm, sizeof (int), _dim, fid);
*h_nzcnt = (int *) malloc(_nzcnt_len * sizeof (int));
fread (*h_nzcnt, sizeof (int), _nzcnt_len, fid);
fclose (fid);
}
void input_vec(char *fName,float *h_vec,int dim)
{
FILE* fid = fopen(fName, "rb");
fread (h_vec, sizeof (float), dim, fid);
fclose(fid);
}
void outputData(char* fName, float *h_Ax_vector,int dim)
{
FILE* fid = fopen(fName, "w");
uint32_t tmp32;
if (fid == NULL)
{
fprintf(stderr, "Cannot open output file\n");
exit(-1);
}
tmp32 = dim;
fwrite(&tmp32, sizeof(uint32_t), 1, fid);
fwrite(h_Ax_vector, sizeof(float), dim, fid);
fclose (fid);
}
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __FILEH__
#define __FILEH__
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
float** h_data, int** h_indices, int** h_ptr,
int** h_perm, int** h_nzcnt);
void input_vec(char* fNanme, float *h_vec,int dim);
void outputData(char* fName, float *h_Ax_vector,int dim);
#endif
\ No newline at end of file
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#include "gpu_info.h"
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm)
{
int max_thread;
int max_block=8;
if(major==1)
{
if(minor>=2)
max_thread=1024;
else
max_thread=768;
}
else if(major==2)
max_thread=1536;
else
//newer GPU //keep using 2.0
max_thread=1536;
int _grid;
int _thread;
if(task*pad>sm*max_thread)
{
_thread=max_thread/max_block;
_grid = ((task*pad+_thread-1)/_thread)*_thread;
}
else
{
_thread=pad;
_grid=task*pad;
}
thread[0]=_thread;
grid[0]=_grid;
}
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __GPUINFOH__
#define __GPUINFOH__
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm);
#endif
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#define WARP_BITS 5
__kernel void spmv_jds(__global float *dst_vector, __global float *d_data,
__global int *d_index, __global int *d_perm,
__global float *x_vec, const int dim,
__constant int *jds_ptr_int,
__constant int *sh_zcnt_int)
{
int ix = get_global_id(0);
int warp_id=ix>>WARP_BITS;
if(ix<dim)
{
float sum=0.0f;
int bound=sh_zcnt_int[warp_id];
//prefetch 0
int j=jds_ptr_int[0]+ix;
float d = d_data[j];
int i = d_index[j];
float t = x_vec[i];
if (bound>1) //bound >=2
{
//prefetch 1
j=jds_ptr_int[1]+ix;
i = d_index[j];
int in;
float dn;
float tn;
for(int k=2;k<bound;k++ )
{
//prefetch k-1
dn = d_data[j];
//prefetch k
j=jds_ptr_int[k]+ix;
in = d_index[j];
//prefetch k-1
tn = x_vec[i];
//compute k-2
sum += d*t;
//sweep to k
i = in;
//sweep to k-1
d = dn;
t =tn;
}
//fetch last
dn = d_data[j];
tn = x_vec[i];
//compute last-1
sum += d*t;
//sweep to last
d=dn;
t=tn;
}
//compute last
sum += d*t; // 3 3
//write out data
dst_vector[d_perm[ix]]=sum;
}
}
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
//#include <CL/cl.h>
//#include <CL/cl_ext.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <parboil.h>
#include "file.h"
#include "gpu_info.h"
#include "convert_dataset.h"
extern "C" void __visc__attributes(...);
extern "C" void __visc__wait(...);
extern "C" unsigned __visc__node(...);
extern "C" int get_global_id(int);
static int generate_vector(float *x_vector, int dim)
{
srand(54321);
int i;
for(i=0;i<dim;i++)
{
x_vector[i] = (rand() / (float) RAND_MAX);
}
return 0;
}
__kernel void spmv_jds(float *dst_vector, float *d_data,
int *d_index, int *d_perm,
float *x_vec, int dim,
int *jds_ptr_int,
int *sh_zcnt_int)
{
__visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int,
1, dst_vector);
int ix = get_global_id(0);
int warp_id=ix>>WARP_BITS;
if(ix<dim)
{
float sum=0.0f;
int bound=sh_zcnt_int[warp_id];
//prefetch 0
int j=jds_ptr_int[0]+ix;
float d = d_data[j];
int i = d_index[j];
float t = x_vec[i];
if (bound>1) //bound >=2
{
//prefetch 1
j=jds_ptr_int[1]+ix;
i = d_index[j];
int in;
float dn;
float tn;
for(int k=2;k<bound;k++ )
{
//prefetch k-1
dn = d_data[j];
//prefetch k
j=jds_ptr_int[k]+ix;
in = d_index[j];
//prefetch k-1
tn = x_vec[i];
//compute k-2
sum += d*t;
//sweep to k
i = in;
//sweep to k-1
d = dn;
t =tn;
}
//fetch last
dn = d_data[j];
tn = x_vec[i];
//compute last-1
sum += d*t;
//sweep to last
d=dn;
t=tn;
}
//compute last
sum += d*t; // 3 3
//write out data
dst_vector[d_perm[ix]]=sum;
}
}
int main(int argc, char** argv) {
struct pb_TimerSet timers;
struct pb_Parameters *parameters;
printf("OpenCL accelerated sparse matrix vector multiplication****\n");
printf("Li-Wen Chang <lchang20@illinois.edu> and Shengzhao Wu<wu14@illinois.edu>\n");
parameters = pb_ReadParameters(&argc, argv);
if ((parameters->inpFiles[0] == NULL) || (parameters->inpFiles[1] == NULL))
{
fprintf(stderr, "Expecting one two filenames\n");
exit(-1);
}
pb_InitializeTimerSet(&timers);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
//parameters declaration
int len;
int depth;
int dim;
int pad=32;
int nzcnt_len;
//host memory allocation
//matrix
float *h_data;
int *h_indices;
int *h_ptr;
int *h_perm;
int *h_nzcnt;
//vector
float *h_Ax_vector;
float *h_x_vector;
//load matrix from files
pb_SwitchToTimer(&timers, pb_TimerID_IO);
//inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad,
// &h_data, &h_indices, &h_ptr,
// &h_perm, &h_nzcnt);
int col_count;
coo_to_jds(
parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx
1, // row padding
pad, // warp size
1, // pack size
1, // is mirrored?
0, // binary matrix
1, // debug level [0:2]
&h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm,
&col_count, &dim, &len, &nzcnt_len, &depth
);
h_Ax_vector=(float*)malloc(sizeof(float)*dim);
h_x_vector=(float*)malloc(sizeof(float)*dim);
input_vec( parameters->inpFiles[1],h_x_vector,dim);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
memset(h_Ax_vector, 0, dim*sizeof(float));
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
size_t grid;
size_t block;
compute_active_thread(&block, &grid, nzcnt_len, pad, 3, 0, 8);
//main execution
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
int i;
for(i=0; i<50; i++)
{
unsigned spmvDFG = __visc__node(spmv_jds, 2, 1, block, grid,
15,
h_Ax_vector,
dim*sizeof(float),
h_data,
len*sizeof(float),
h_indices,
len*sizeof(int),
h_perm,
dim*sizeof(int),
h_x_vector,
dim*sizeof(float),
dim,
h_ptr,
depth*sizeof(int),
h_nzcnt,
nzcnt_len*sizeof(int),
0);
__visc__wait(spmvDFG);
/******************************* Issues *******************************
* 1. Using OpenCL to compute grid and block dimensions
* (getting device info)
* We need to check the GPU version (major number) where this kernel
* executes to compare against opencl_nvidia version
* 2. Type of cl_mem buffer for d_x_vector is created with size of float,
but copied in through size of int.
Due to type of h_x_vector, I chose to use float
* (Minor)
* 3. Kernel initially used constant memory for last two arguments - removed
*/
}
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
//HtoD memory copy
if (parameters->outFile) {
pb_SwitchToTimer(&timers, pb_TimerID_IO);
outputData(parameters->outFile,h_Ax_vector,dim);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
free (h_data);
free (h_indices);
free (h_ptr);
free (h_perm);
free (h_nzcnt);
free (h_Ax_vector);
free (h_x_vector);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
pb_FreeParameters(parameters);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment