Skip to content
Snippets Groups Projects
Commit 8cc8470c authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Commiting the pipeline version of pldi

parent a403ac3a
No related branches found
No related tags found
No related merge requests found
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
/*
* Main entry of dense matrix-matrix multiplication kernel
*/
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <malloc.h>
#include <vector>
#include <iostream>
#include <cassert>
#include <parboil.h>
#include <visc.h>
#include <pthread.h>
// I/O routines
extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, std::vector<float>&v);
extern bool writeColMajorMatrixFile(const char *fn, int, int, std::vector<float>&);
extern char* readFile(const char*);
// Definitions of sizes for edge detection kernels
#define MIN_BR 0.0f
#define MAX_BR 1.0f
// Code needs to be changed for this to vary
#define SZB 3
#define REDUCTION_TILE_SZ 1024
#define MIN(X,Y) ((int)(X) < (int)(Y) ? (X) : (Y))
#define MAX(X,Y) ((int)(X) > (int)(Y) ? (X) : (Y))
void SeqlaplacianEstimate(float *I, size_t bytesI,
float *B, size_t bytesB,
float *L, size_t bytesL,
int m, int n, int gx, int gy) {
// 3x3 image area
float imageArea[SZB][SZB];
int i, j;
if ((gx < n) && (gy < m)) {
//Data copy for dilation filter
imageArea[1][1] = I[gy * n + gx];
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MIN_BR;
} else {
imageArea[1][0] = I[gy * n + gx - 1];
imageArea[0][0] = (gy > 0) ? I[(gy - 1) * n + gx - 1] : MIN_BR;
imageArea[2][0] = (gy < m - 1) ? I[(gy + 1) * n + gx - 1] : MIN_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MIN_BR;
} else {
imageArea[1][2] = I[gy * n + gx + 1];
imageArea[0][2] = (gy > 0) ? I[(gy - 1) * n + gx + 1] : MIN_BR;
imageArea[2][2] = (gy < m - 1) ? I[(gy + 1) * n + gx + 1] : MIN_BR;
}
imageArea[0][1] = (gy > 0) ? I[(gy - 1) * n + gx] : MIN_BR;
imageArea[2][1] = (gy < m - 1) ? I[(gy + 1) * n + gx] : MIN_BR;
//Compute pixel of dilated image
float dilatedPixel = MIN_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
dilatedPixel = MAX(dilatedPixel, imageArea[i][j] * B[i*SZB + j]);
//Data copy for erotion filter - only change the boundary conditions
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MAX_BR;
} else {
if (gy == 0) imageArea[0][0] = MAX_BR;
if (gy == m-1) imageArea[2][0] = MAX_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MAX_BR;
} else {
if (gy == 0) imageArea[0][2] = MAX_BR;
if (gy == m-1) imageArea[2][2] = MAX_BR;
}
if (gy == 0) imageArea[0][1] = MAX_BR;
if (gy == m-1) imageArea[2][1] = MAX_BR;
//Compute pixel of eroded image
float erodedPixel = MAX_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
erodedPixel = MIN(erodedPixel, imageArea[i][j] * B[i*SZB + j]);
float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1][1];
L[gy*n+gx] = laplacian;
}
}
void SeqcomputeZeroCrossings(float *L, size_t bytesL,
float *B, size_t bytesB,
float *S, size_t bytesS,
int m, int n, int gx, int gy) {
// 3x3 image area
float imageArea[SZB][SZB];
int i, j;
//if(gx == 0 && gy == 0)
//std::cout << "Entered ZC\n";
if ((gx < n) && (gy < m)) {
// Data copy for dilation filter
imageArea[1][1] = L[gy * n + gx];
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MIN_BR;
} else {
imageArea[1][0] = L[gy * n + gx - 1];
imageArea[0][0] = (gy > 0) ? L[(gy - 1) * n + gx - 1] : MIN_BR;
imageArea[2][0] = (gy < m - 1) ? L[(gy + 1) * n + gx - 1] : MIN_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MIN_BR;
} else {
imageArea[1][2] = L[gy * n + gx + 1];
imageArea[0][2] = (gy > 0) ? L[(gy - 1) * n + gx + 1] : MIN_BR;
imageArea[2][2] = (gy < m - 1) ? L[(gy + 1) * n + gx + 1] : MIN_BR;
}
imageArea[0][1] = (gy > 0) ? L[(gy - 1) * n + gx] : MIN_BR;
imageArea[2][1] = (gy < m - 1) ? L[(gy + 1) * n + gx] : MIN_BR;
//Compute pixel of dilated image
float dilatedPixel = MIN_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
dilatedPixel = MAX(dilatedPixel, imageArea[i][j] * B[i*SZB + j]);
//Data copy for erotion filter - only change the boundary conditions
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MAX_BR;
} else {
if (gy == 0) imageArea[0][0] = MAX_BR;
if (gy == m-1) imageArea[2][0] = MAX_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MAX_BR;
} else {
if (gy == 0) imageArea[0][2] = MAX_BR;
if (gy == m-1) imageArea[2][2] = MAX_BR;
}
if (gy == 0) imageArea[0][1] = MAX_BR;
if (gy == m-1) imageArea[2][1] = MAX_BR;
//Compute pixel of eroded image
float erodedPixel = MAX_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
erodedPixel = MIN(erodedPixel, imageArea[i][j] * B[i*SZB + j]);
float pixelSign = dilatedPixel - erodedPixel;
S[gy*n+gx] = pixelSign;
}
//if(gx == n-1 && gy == n-1)
//std::cout << "Exit ZC\n";
}
extern "C" {
struct __attribute__((__packed__)) OutStruct {
//int m;
//int n;
size_t bytesB;
size_t bytesOut;
};
struct __attribute__((__packed__)) InStruct {
float* I ;
size_t bytesI;
float* B;
size_t bytesB;
float* L;
size_t bytesL;
float* S;
size_t bytesS;
int m;
int n;
};
//void* __visc__createNode2D(...);
//void* __visc__createNode1D(...);
//void* __visc__createNode(...);
//void __visc__bindIn(void*, unsigned, unsigned, unsigned);
//void __visc__bindOut(void*, unsigned, unsigned, unsigned);
//void* __visc__edge(void*, void*, unsigned, unsigned, unsigned);
//void __visc__push(void*, void*);
//void* __visc__pop(void*);
//void* __visc__launch(unsigned,...);
//void __visc__wait(void*);
//void* __visc__getNode();
//unsigned __visc__getNodeInstanceID_x(void*);
//unsigned __visc__getNodeInstanceID_y(void*);
void packData(struct InStruct* args, float* I, size_t bytesI,
float* B, size_t bytesB,
float* L, size_t bytesL,
float* S, size_t bytesS,
int m, int n) {
args->I = I;
args->bytesI = bytesI;
args->B = B;
args->bytesB = bytesB;
args->L = L;
args->bytesL = bytesL;
args->S = S;
args->bytesS = bytesS;
args->m = m;
args->n = n;
}
/* Compute a non-linear laplacian estimate of input image I of size m x n */
/*
I : imput image
m, n : dimensions
B : structural element for dilation - erosion ([0 1 0; 1 1 1; 0 1 0])
L : output (laplacian of the image)
Need 2D grid, a thread per pixel
*/
OutStruct laplacianEstimate(float *I, size_t bytesI,
float *B, size_t bytesB,
float *L, size_t bytesL,
int m, int n) {
//__visc__hint(visc::SPIR_TARGET);
__visc__hint(visc::GPU_TARGET);
__visc__attributes(2, I, B, 1, L);
// 3x3 image area
float imageArea[SZB][SZB];
//int gx = get_global_id(0);
//int gy = get_global_id(1);
void* thisNode = __visc__getNode();
int gx = __visc__getNodeInstanceID_x(thisNode);
int gy = __visc__getNodeInstanceID_y(thisNode);
//if(gx == 0 && gy == 0)
//std::cout << "Entered laplacian\n";
int i, j;
if ((gx < n) && (gy < m)) {
//Data copy for dilation filter
imageArea[1][1] = I[gy * n + gx];
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MIN_BR;
} else {
imageArea[1][0] = I[gy * n + gx - 1];
imageArea[0][0] = (gy > 0) ? I[(gy - 1) * n + gx - 1] : MIN_BR;
imageArea[2][0] = (gy < m - 1) ? I[(gy + 1) * n + gx - 1] : MIN_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MIN_BR;
} else {
imageArea[1][2] = I[gy * n + gx + 1];
imageArea[0][2] = (gy > 0) ? I[(gy - 1) * n + gx + 1] : MIN_BR;
imageArea[2][2] = (gy < m - 1) ? I[(gy + 1) * n + gx + 1] : MIN_BR;
}
imageArea[0][1] = (gy > 0) ? I[(gy - 1) * n + gx] : MIN_BR;
imageArea[2][1] = (gy < m - 1) ? I[(gy + 1) * n + gx] : MIN_BR;
//Compute pixel of dilated image
float dilatedPixel = MIN_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
dilatedPixel = MAX(dilatedPixel, imageArea[i][j] * B[i*SZB + j]);
//Data copy for erotion filter - only change the boundary conditions
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MAX_BR;
} else {
if (gy == 0) imageArea[0][0] = MAX_BR;
if (gy == m-1) imageArea[2][0] = MAX_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MAX_BR;
} else {
if (gy == 0) imageArea[0][2] = MAX_BR;
if (gy == m-1) imageArea[2][2] = MAX_BR;
}
if (gy == 0) imageArea[0][1] = MAX_BR;
if (gy == m-1) imageArea[2][1] = MAX_BR;
//Compute pixel of eroded image
float erodedPixel = MAX_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
erodedPixel = MIN(erodedPixel, imageArea[i][j] * B[i*SZB + j]);
float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1][1];
L[gy*n+gx] = laplacian;
}
OutStruct output = {bytesB, bytesL};
//if(gx == m-1 && gy == n-1)
//std::cout << "Exit laplacian\n";
return output;
}
OutStruct WrapperlaplacianEstimate(float *I, size_t bytesI,
float *B, size_t bytesB,
float *L, size_t bytesL,
int m, int n) {
//__visc__hint(visc::SPIR_TARGET);
__visc__hint(visc::GPU_TARGET);
__visc__attributes(2, I, B, 1, L);
void* LNode = __visc__createNode2D(laplacianEstimate, m, n);
__visc__bindIn(LNode, 0, 0, 0); // Bind I
__visc__bindIn(LNode, 1, 1, 0); // Bind bytesI
__visc__bindIn(LNode, 2, 2, 0); // Bind B
__visc__bindIn(LNode, 3, 3, 0); // Bind bytesB
__visc__bindIn(LNode, 4, 4, 0); // Bind L
__visc__bindIn(LNode, 5, 5, 0); // Bind bytesL
__visc__bindIn(LNode, 6, 6, 0); // Bind m
__visc__bindIn(LNode, 7, 7, 0); // Bind n
__visc__bindOut(LNode, 0, 0, 0); // bind output m
__visc__bindOut(LNode, 1, 1, 0); // bind output n
return {0};
}
/* Compute the zero crossings of input image L of size m x n */
/*
L : imput image (computed Laplacian)
m, n : dimensions
B : structural element for dilation - erosion ([0 1 0; 1 1 1; 0 1 0])
S : output (sign of the image)
Need 2D grid, a thread per pixel
*/
OutStruct computeZeroCrossings(float *L, size_t bytesL,
float *B, size_t bytesB,
float *S, size_t bytesS,
int m, int n) {
//__visc__hint(visc::SPIR_TARGET);
//__visc__hint(visc::GPU_TARGET);
__visc__attributes(2, L, B, 1, S);
// 3x3 image area
float imageArea[SZB][SZB];
//int gx = get_global_id(0);
//int gy = get_global_id(1);
void* thisNode = __visc__getNode();
int gx = __visc__getNodeInstanceID_x(thisNode);
int gy = __visc__getNodeInstanceID_y(thisNode);
int i, j;
//if(gx == 0 && gy == 0)
//std::cout << "Entered ZC\n";
if ((gx < n) && (gy < m)) {
// Data copy for dilation filter
imageArea[1][1] = L[gy * n + gx];
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MIN_BR;
} else {
imageArea[1][0] = L[gy * n + gx - 1];
imageArea[0][0] = (gy > 0) ? L[(gy - 1) * n + gx - 1] : MIN_BR;
imageArea[2][0] = (gy < m - 1) ? L[(gy + 1) * n + gx - 1] : MIN_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MIN_BR;
} else {
imageArea[1][2] = L[gy * n + gx + 1];
imageArea[0][2] = (gy > 0) ? L[(gy - 1) * n + gx + 1] : MIN_BR;
imageArea[2][2] = (gy < m - 1) ? L[(gy + 1) * n + gx + 1] : MIN_BR;
}
imageArea[0][1] = (gy > 0) ? L[(gy - 1) * n + gx] : MIN_BR;
imageArea[2][1] = (gy < m - 1) ? L[(gy + 1) * n + gx] : MIN_BR;
//Compute pixel of dilated image
float dilatedPixel = MIN_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
dilatedPixel = MAX(dilatedPixel, imageArea[i][j] * B[i*SZB + j]);
//Data copy for erotion filter - only change the boundary conditions
if (gx == 0) {
imageArea[0][0] = imageArea[1][0] = imageArea[2][0] = MAX_BR;
} else {
if (gy == 0) imageArea[0][0] = MAX_BR;
if (gy == m-1) imageArea[2][0] = MAX_BR;
}
if (gx == n - 1) {
imageArea[0][2] = imageArea[1][2] = imageArea[2][2] = MAX_BR;
} else {
if (gy == 0) imageArea[0][2] = MAX_BR;
if (gy == m-1) imageArea[2][2] = MAX_BR;
}
if (gy == 0) imageArea[0][1] = MAX_BR;
if (gy == m-1) imageArea[2][1] = MAX_BR;
//Compute pixel of eroded image
float erodedPixel = MAX_BR;
for (i = 0; i < SZB; i++)
for (j = 0; j < SZB; j++)
erodedPixel = MIN(erodedPixel, imageArea[i][j] * B[i*SZB + j]);
float pixelSign = dilatedPixel - erodedPixel;
S[gy*n+gx] = pixelSign;
}
OutStruct output = {bytesB, bytesS};
//if(gx == n-1 && gy == n-1)
//std::cout << "Exit ZC\n";
return output;
}
OutStruct WrapperComputeZeroCrossings(float *L, size_t bytesL,
float *B, size_t bytesB,
float *S, size_t bytesS,
int m, int n) {
//__visc__hint(visc::SPIR_TARGET);
//__visc__hint(visc::GPU_TARGET);
__visc__attributes(2, L, B, 1, S);
void* ZCNode = __visc__createNode2D(computeZeroCrossings, m, n);
__visc__bindIn(ZCNode, 0, 0, 0); // Bind L
__visc__bindIn(ZCNode, 1, 1, 0); // Bind bytesL
__visc__bindIn(ZCNode, 2, 2, 0); // Bind B
__visc__bindIn(ZCNode, 3, 3, 0); // Bind bytesB
__visc__bindIn(ZCNode, 4, 4, 0); // Bind S
__visc__bindIn(ZCNode, 5, 5, 0); // Bind bytesS
__visc__bindIn(ZCNode, 6, 6, 0); // Bind m
__visc__bindIn(ZCNode, 7, 7, 0); // Bind n
__visc__bindOut(ZCNode, 0, 0, 0); // bind output m
__visc__bindOut(ZCNode, 1, 1, 0); // bind output n
return {0};
}
//Pipelined Root node
OutStruct edgeDetection(float *I, size_t bytesI,
float *B, size_t bytesB,
float *L, size_t bytesL,
float *S, size_t bytesS,
int m, int n) {
__visc__attributes(2, I, B, 2, L, S);
void* PNode = __visc__createNode(WrapperlaplacianEstimate);
void* CNode = __visc__createNode(WrapperComputeZeroCrossings);
__visc__bindIn(PNode, 0, 0, 1); // Bind I
__visc__bindIn(PNode, 1, 1, 1); // Bind bytesI
__visc__bindIn(PNode, 2, 2, 1); // Bind B
__visc__bindIn(PNode, 3, 3, 1); // Bind bytesB
__visc__bindIn(PNode, 4, 4, 1); // Bind L
__visc__bindIn(PNode, 5, 5, 1); // Bind bytesL
__visc__bindIn(PNode, 8, 6, 1); // Bind m
__visc__bindIn(PNode, 9, 7, 1); // Bind n
__visc__bindIn(CNode, 4, 0, 1); // Bind L
__visc__edge(PNode, CNode, 1, 1, 1); // Get bytesL
__visc__bindIn(CNode, 2, 2, 1); // Bind B
__visc__edge(PNode, CNode, 0, 3, 1); // Get bytesB
__visc__bindIn(CNode, 6, 4, 1); // Bind S
__visc__bindIn(CNode, 7, 5, 1); // Bind bytesS
__visc__bindIn(CNode, 8, 6, 1); // Bind m
__visc__bindIn(CNode, 9, 7, 1); // Bind n
__visc__bindOut(CNode, 0, 0, 1); // dummy bind output to get pipeline functionality
__visc__bindOut(CNode, 1, 1, 1); // dummy bind output to get pipeline functionality
return {0};
}
// Non-pipelined Root node
//OutStruct edgeDetection(float *I, size_t bytesI,
//float *B, size_t bytesB,
//float *L, size_t bytesL,
//float *S, size_t bytesS,
//int m, int n) {
//__visc__attributes(2, I, B, 2, L, S);
//void* PNode = __visc__createNode(WrapperlaplacianEstimate);
//void* CNode = __visc__createNode(WrapperComputeZeroCrossings);
//__visc__bindIn(PNode, 0, 0, 0); // Bind I
//__visc__bindIn(PNode, 1, 1, 0); // Bind bytesI
//__visc__bindIn(PNode, 2, 2, 0); // Bind B
//__visc__bindIn(PNode, 3, 3, 0); // Bind bytesB
//__visc__bindIn(PNode, 4, 4, 0); // Bind L
//__visc__bindIn(PNode, 5, 5, 0); // Bind bytesL
//__visc__bindIn(PNode, 8, 6, 0); // Bind m
//__visc__bindIn(PNode, 9, 7, 0); // Bind n
//__visc__bindIn(CNode, 4, 0, 0); // Bind L
//__visc__edge(PNode, CNode, 1, 1, 0); // Get bytesL
//__visc__bindIn(CNode, 2, 2, 0); // Bind B
//__visc__edge(PNode, CNode, 0, 3, 0); // Get bytesB
//__visc__bindIn(CNode, 6, 4, 0); // Bind S
//__visc__bindIn(CNode, 7, 5, 0); // Bind bytesS
//__visc__bindIn(CNode, 8, 6, 0); // Bind m
//__visc__bindIn(CNode, 9, 7, 0); // Bind n
//__visc__bindOut(CNode, 0, 0, 0); // dummy bind output to get pipeline functionality
//__visc__bindOut(CNode, 1, 1, 0); // dummy bind output to get pipeline functionality
//return {0};
//}
}
int main (int argc, char *argv[]) {
struct pb_Parameters *params;
struct pb_TimerSet timers;
size_t I_sz, L_sz, S_sz;
int matIrow, matIcol;
std::vector<float> matI;
/* Read command line. Expect 3 inputs: A, B and B^T
in column-major layout*/
params = pb_ReadParameters(&argc, argv);
if ((params->inpFiles[0] == NULL)
|| (params->inpFiles[1] == NULL)
|| (params->inpFiles[2] == NULL)
|| (params->inpFiles[3] != NULL))
{
fprintf(stderr, "Expecting three input filenames\n");
exit(-1);
}
/* Read in data */
// load A
readColMajorMatrixFile(params->inpFiles[0],
matIrow, matIcol, matI);
pb_InitializeTimerSet(&timers);
__visc__init();
//pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE );
// copy A to device memory
I_sz = matIrow*matIcol*sizeof(float);
L_sz = I_sz;
// allocate space for C
S_sz = I_sz;
// OpenCL memory allocation
std::vector<float> matL(matIrow*matIcol);
std::vector<float> matS(matIrow*matIcol);
float B[] = {0, 1, 0, 1, 1, 1, 0, 1, 0};
size_t bytesB = 9*sizeof(float);
// Copy A and B^T into device memory
//pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE );
pb_SwitchToTimer( &timers, pb_TimerID_NONE );
struct InStruct* args = (struct InStruct*)malloc (sizeof(InStruct));
packData(args, &matI.front(), I_sz,
B, bytesB,
&matL.front(), L_sz,
&matS.front(), S_sz,
matIrow, matIcol);
#define NUM_RUNS 1
#define NUM_FRAMES 1
std::cout << "Executing Pipeline Version\n";
// Non-Pipeline Execution Time
pb_SwitchToTimer( &timers, visc_TimerID_COMPUTATION );
for(unsigned j=0; j< NUM_RUNS; j++) {
llvm_visc_track_mem(&matI[0], I_sz);
llvm_visc_track_mem(B, bytesB);
llvm_visc_track_mem(&matL[0], L_sz);
llvm_visc_track_mem(&matS[0], S_sz);
void* DFG = __visc__launch(1, edgeDetection, (void*)args);
for(unsigned i=0 ; i < NUM_FRAMES; i++) {
__visc__push(DFG, args);
__visc__pop(DFG);
}
__visc__wait(DFG);
llvm_visc_untrack_mem(&matI[0]);
llvm_visc_untrack_mem(B);
llvm_visc_untrack_mem(&matL[0]);
llvm_visc_untrack_mem(&matS[0]);
}
//Pipeline
//pb_SwitchToTimer( &timers, visc_TimerID_COMPUTATION );
//for(unsigned j=0; j< NUM_RUNS; j++) {
//llvm_visc_track_mem(&matI[0], I_sz);
//llvm_visc_track_mem(B, bytesB);
//llvm_visc_track_mem(&matL[0], L_sz);
//llvm_visc_track_mem(&matS[0], S_sz);
//void* DFG = __visc__launch(1, edgeDetection, (void*)args);
//__visc__push(DFG, args);
//__visc__push(DFG, args);
//for(unsigned i=0 ; i < NUM_FRAMES-2; i++) {
//__visc__push(DFG, args);
//__visc__pop(DFG);
//}
//__visc__pop(DFG);
//__visc__pop(DFG);
//__visc__wait(DFG);
//llvm_visc_untrack_mem(&matI[0]);
//llvm_visc_untrack_mem(B);
//llvm_visc_untrack_mem(&matL[0]);
//llvm_visc_untrack_mem(&matS[0]);
//}
//llvm_visc_request_mem(&matS[0], S_sz);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
std::cout << "Executing Sequential Version\n";
// Sequential Execution Time
//pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
//for(unsigned j=0; j< NUM_RUNS; j++) {
//for(unsigned i=0 ; i < NUM_FRAMES; i++) {
//for(int gx = 0; gx < matIrow; gx++) {
//for (int gy=0; gy < matIcol; gy++) {
//SeqlaplacianEstimate(&matI.front(), I_sz,
//B, bytesB,
//&matL.front(), L_sz,
//matIrow, matIcol, gx, gy);
//SeqcomputeZeroCrossings(&matL.front(), L_sz,
//B, bytesB,
//&matS.front(), S_sz,
//matIrow, matIcol, gx, gy);
//}
//}
//}
//}
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
__visc__cleanup();
std::cout << "Writing Result\n";
if (params->outFile) {
/* Write C to file */
pb_SwitchToTimer(&timers, pb_TimerID_IO);
writeColMajorMatrixFile(params->outFile,
matIrow, matIcol, matS);
}
double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
std::cout<< "GFLOPs = " << 2.* matIrow * matIcol * matIcol/GPUtime/1e9 << std::endl;
pb_FreeParameters(params);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment