Removing paper and cfar-report

c62c1e51 · Akash Kothari · 4effc383 · 4effc383 · 4effc383 · 4effc383
Commit c62c1e51 authored 5 years ago by Akash Kothari
--- a/hpvm/cfar-report/Code/main.cc
+++ b/hpvm/cfar-report/Code/main.cc
-/***************************************************************************
- *cr
- *cr            (C) Copyright 2010 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/* 
- * Main entry of dense matrix-matrix multiplication kernel
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <malloc.h>
-#include <vector>
-#include <iostream>
-#include <cassert>
-#include <CL/cl.h>
-#include <parboil.h>
-
-// I/O routines
-extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, std::vector<float>&v);
-extern bool writeColMajorMatrixFile(const char *fn, int, int, std::vector<float>&);
-extern char* readFile(const char*);
-
-// Parameters of tile sizes
-#define TILE_SZ 16
-
-#define CHECK_ERROR(errorMessage)           \
-  if(clStatus != CL_SUCCESS)                \
-  {                                         \
-     std::cout<< errorMessage <<": "<< clStatus <<" Error!\n";  \
-     std::cout<<"Line: "<<__LINE__<<"\n";   \
-     exit(1);                               \
-  }
-
-void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, cl_command_queue clCommandQueue )
-{
-  // In this code we assume the matrix sizes are multiple of tile size
-  if ((m%TILE_SZ) || (n%TILE_SZ)) {
-    std::cerr << "unsupported size of matrix. m should be multiple of " << TILE_SZ
-      << "; n should be multiple of " << TILE_SZ << std::endl;
-  }
-
-//#ifdef ROWM
-  //size_t db = m;
-  //size_t dg = (m*n);
-//#else
-  //size_t db = n;
-  //size_t dg = (m*n);
-//#endif
-#ifdef ROWM
-  size_t dg = m;
-#else
-  size_t dg = n;
-#endif
-  cl_int clStatus;
-  //std::cout << "Block dim = " << db << ", Group dim = " << dg/db << "\n";
-  clStatus = clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A);
-  clStatus = clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B);
-  clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C);
-  clStatus = clSetKernelArg(clKernel,3,sizeof(int),(void*)&m);
-  clStatus = clSetKernelArg(clKernel,4,sizeof(int),(void*)&n);
-  CHECK_ERROR("clSetKernelArg")
-
-  //clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,&db,0,NULL,NULL);
-  clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,NULL,0,NULL,NULL);
-  CHECK_ERROR("clEnqueueNDRangeKernel")
-
-  clStatus = clFinish(clCommandQueue); 
-  CHECK_ERROR("clFinish")
-}
-
-int main (int argc, char *argv[]) {
-
-  struct pb_Parameters *params;
-  struct pb_TimerSet timers;
-
-  size_t A_sz, B_sz, C_sz;
-  int matArow, matAcol;
-  int matBrow, matBcol;
-  std::vector<float> matA, matB;
-
-
-  /* Read command line. Expect 3 inputs: A, B and B^T 
-     in column-major layout*/
-  params = pb_ReadParameters(&argc, argv);
-  if ((params->inpFiles[0] == NULL) 
-      || (params->inpFiles[1] == NULL)
-      || (params->inpFiles[2] != NULL))
-    {
-      fprintf(stderr, "Expecting three input filenames\n");
-      exit(-1);
-    }
-
-  /* Read in data */
-  // load A
-  readColMajorMatrixFile(params->inpFiles[0],
-      matArow, matAcol, matA);
-  // load B^T
-  readColMajorMatrixFile(params->inpFiles[1],
-      matBrow, matBcol, matB);
-
-  assert(matArow == matBrow && matAcol == matBcol && "Dimensions of two input matrices should match");
-  pb_InitializeTimerSet(&timers);
-
-  pb_SwitchToTimer(&timers, visc_TimerID_SETUP);
-  cl_int clStatus;
-  cl_platform_id clPlatform;
-  clStatus = clGetPlatformIDs(1,&clPlatform,NULL);
-  CHECK_ERROR("clGetPlatformIDs")
-
-  cl_context_properties clCps[3] = {CL_CONTEXT_PLATFORM,(cl_context_properties)clPlatform,0};
-  cl_context clContext = clCreateContextFromType(clCps,CL_DEVICE_TYPE_GPU,NULL,NULL,&clStatus);
-  CHECK_ERROR("clCreateContextFromType")
-   
-  cl_device_id clDevice;
-  clStatus = clGetDeviceIDs(clPlatform,CL_DEVICE_TYPE_GPU,1,&clDevice,NULL);
-  CHECK_ERROR("clGetDeviceIDs")
-
-  cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus);
-  CHECK_ERROR("clCreateCommandQueue")
-
-  pb_SetOpenCL(&clContext, &clCommandQueue);
-
-  // const char* clSource[] = {readFile("src/opencl_base/kernel_offline.nvptx.s")};
-  // cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
-  cl_kernel clKernel;
-  cl_program clProgram;
-  pb_CreateAndBuildKernelFromBinary("build/opencl_default/kernel_offline.nvptx.s", "matAdd", &clContext, &clDevice, &clProgram, &clKernel);
-  //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
-  //CHECK_ERROR("clCreateProgramWithSource")
-
-  //char clOptions[50];
-  //sprintf(clOptions,"");
-
-  //clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
-  //CHECK_ERROR("clBuildProgram")
-
-  //cl_kernel clKernel = clCreateKernel(clProgram,"mysgemmNT",&clStatus);
-  //CHECK_ERROR("clCreateKernel")
-
-  pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE );
-  // copy A to device memory
-  A_sz = matArow*matAcol*sizeof(float);
-  B_sz = matBrow*matBcol*sizeof(float);
-
-  // allocate space for C
-  C_sz = matArow*matBcol*sizeof(float);
-
-  // OpenCL memory allocation
-  std::vector<float> matC(matArow*matBcol);
-  
-  pb_SwitchToTimer( &timers, pb_TimerID_COPY );
-  cl_mem dA = clCreateBuffer(clContext,CL_MEM_READ_ONLY,A_sz,NULL,&clStatus);
-  CHECK_ERROR("clCreateBuffer")
-  cl_mem dB = clCreateBuffer(clContext,CL_MEM_READ_ONLY,B_sz,NULL,&clStatus);
-  CHECK_ERROR("clCreateBuffer")
-  cl_mem dC = clCreateBuffer(clContext,CL_MEM_WRITE_ONLY,C_sz,NULL,&clStatus);
-  CHECK_ERROR("clCreateBuffer")
-
-  // Copy A and B^T into device memory
-  clStatus = clEnqueueWriteBuffer(clCommandQueue,dA,CL_FALSE,0,A_sz,&matA.front(),0,NULL,NULL);
-  CHECK_ERROR("clEnqueueWriteBuffer")
-  clStatus = clEnqueueWriteBuffer(clCommandQueue,dB,CL_FALSE,0,B_sz,&matB.front(),0,NULL,NULL);
-  CHECK_ERROR("clEnqueueWriteBuffer")
-
-  clStatus = clEnqueueWriteBuffer(clCommandQueue,dC,CL_TRUE,0,C_sz,&matC.front(),0,NULL,NULL);
-  CHECK_ERROR("clEnqueueWriteBuffer")
-
-  pb_SwitchToTimer( &timers, pb_TimerID_KERNEL );
-
-  // Use standard sgemm interface
-  basicSgemm(matArow, matAcol, dA, dB, dC, clKernel, clCommandQueue);
-
-  pb_SwitchToTimer( &timers, pb_TimerID_COPY );
-  clEnqueueReadBuffer(clCommandQueue,dC,CL_TRUE,0,C_sz,&matC.front(),0,NULL,NULL);
-
-  pb_SwitchToTimer( &timers, visc_TimerID_SETUP);
-  clStatus = clReleaseKernel(clKernel);
-  clStatus = clReleaseProgram(clProgram);
-  clStatus = clReleaseMemObject(dA);
-  clStatus = clReleaseMemObject(dB);
-  clStatus = clReleaseMemObject(dC);
-  clStatus = clReleaseCommandQueue(clCommandQueue);
-  clStatus = clReleaseContext(clContext); 
- 
-  pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-  pb_PrintTimerSet(&timers);
-  
-  if (params->outFile) {
-   
-    /* Write C to file */
-    //pb_SwitchToTimer(&timers, pb_TimerID_IO);
-    writeColMajorMatrixFile(params->outFile,
-	matArow, matBcol, matC); 
-  }
-
-
-  double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
-  std::cout<< "GFLOPs = " << 2.* matArow * matBcol * matAcol/GPUtime/1e9 << std::endl;
-  pb_FreeParameters(params);
-
-  //free((void*)clSource[0]);
-
- 
-  return 0;
-}
--- a/hpvm/cfar-report/Code/vecAddC.cl
+++ b/hpvm/cfar-report/Code/vecAddC.cl
-__kernel void matAdd( __global float *A, __global float *B, __global float* C, int m, int n ) {
-    int j = get_global_id(0);
-    for(int i=0; i < m; i++)
-      C[i*n+j] = A[i*n+j] + B[i*n+j];
-}
-
-void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, cl_command_queue clCommandQueue )
-{
-  size_t global_work_group = n;
-  
-  clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A);
-  clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B);
-  clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C);
-  clSetKernelArg(clKernel,3,sizeof(int),(void*)&m);
-  clSetKernelArg(clKernel,4,sizeof(int),(void*)&n);
-
-  clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, NULL, &global_work_group, NULL, 0, NULL, NULL);
-
-  clFinish(clCommandQueue); 
-}
--- a/hpvm/cfar-report/Code/vecAddR.cl
+++ b/hpvm/cfar-report/Code/vecAddR.cl
-__kernel void matAdd( __global float *A, __global float *B, __global float* C, int m, int n ) {
-    int i = get_global_id(0);
-    for(int j=0; j < n; j++)
-      C[i*n+j] = A[i*n+j] + B[i*n+j];
-}
-
-void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, cl_command_queue clCommandQueue )
-{
-  size_t global_work_group = m;
-
-  clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A);
-  clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B);
-  clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C);
-  clSetKernelArg(clKernel,3,sizeof(int),(void*)&m);
-  clSetKernelArg(clKernel,4,sizeof(int),(void*)&n);
-
-  clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, NULL,
-      &global_work_group, NULL, 0, NULL, NULL);
-
-  clFinish(clCommandQueue); 
-}
--- a/hpvm/cfar-report/Figures/RAR PGM v01.docx
+++ b/hpvm/cfar-report/Figures/RAR PGM v01.docx
--- a/hpvm/cfar-report/Figures/RARpic.jpg
+++ b/hpvm/cfar-report/Figures/RARpic.jpg
--- a/hpvm/cfar-report/Figures/RARpic.pptx
+++ b/hpvm/cfar-report/Figures/RARpic.pptx
--- a/hpvm/cfar-report/Figures/Results.pdf
+++ b/hpvm/cfar-report/Figures/Results.pdf
--- a/hpvm/cfar-report/Figures/compilationFlow.png
+++ b/hpvm/cfar-report/Figures/compilationFlow.png
--- a/hpvm/cfar-report/Figures/fig-x1.pdf
+++ b/hpvm/cfar-report/Figures/fig-x1.pdf
--- a/hpvm/cfar-report/Figures/fig-x2.pdf
+++ b/hpvm/cfar-report/Figures/fig-x2.pdf
--- a/hpvm/cfar-report/Figures/fig-x3.pdf
+++ b/hpvm/cfar-report/Figures/fig-x3.pdf
--- a/hpvm/cfar-report/Figures/gpu-fpga-cmp.eps
+++ b/hpvm/cfar-report/Figures/gpu-fpga-cmp.eps
--- a/hpvm/cfar-report/Figures/gpu-fpga-cmp.jpg
+++ b/hpvm/cfar-report/Figures/gpu-fpga-cmp.jpg
--- a/hpvm/cfar-report/Figures/gpu-fpga-cmp.pdf
+++ b/hpvm/cfar-report/Figures/gpu-fpga-cmp.pdf
--- a/hpvm/cfar-report/Figures/gpuMapping.pdf
+++ b/hpvm/cfar-report/Figures/gpuMapping.pdf
--- a/hpvm/cfar-report/Figures/mapsExample.pdf
+++ b/hpvm/cfar-report/Figures/mapsExample.pdf
--- a/hpvm/cfar-report/Figures/mapsExampleD.pdf
+++ b/hpvm/cfar-report/Figures/mapsExampleD.pdf
--- a/hpvm/cfar-report/Figures/mapsExampleS.pdf
+++ b/hpvm/cfar-report/Figures/mapsExampleS.pdf
--- a/hpvm/cfar-report/Figures/mem-stall-improved.pdf
+++ b/hpvm/cfar-report/Figures/mem-stall-improved.pdf
--- a/hpvm/cfar-report/Figures/memoryModel.pdf
+++ b/hpvm/cfar-report/Figures/memoryModel.pdf