Skip to content
Snippets Groups Projects
Commit 0ad4f77d authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Fixed pipeline benchmark to not specify return types of internal nodes. That...

Fixed pipeline benchmark to not specify return types of internal nodes. That is automatically inferred from bind out intrinsics
parent 25bedf2c
No related branches found
No related tags found
No related merge requests found
......@@ -22,14 +22,15 @@ SRCDIR = src/$(VERSION)
BUILDDIR = build/$(VERSION)_$(PLATFORM)
DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
MATRIX1 = $(DATASET_DIR)/$(TEST)/input/matrix1.txt
MATRIX2 = $(DATASET_DIR)/$(TEST)/input/matrix2.txt
MATRIX2T = $(DATASET_DIR)/$(TEST)/input/matrix2t.txt
REF_OUTPUT = $(DATASET_DIR)/$(TEST)/output/matrix3.txt
IMAGE = $(DATASET_DIR)/$(TEST)/input/edgetest_10.png
VIDEO = $(DATASET_DIR)/$(TEST)/input/360x240_1mb.mp4
#VIDEO = $(DATASET_DIR)/$(TEST)/input/bird.avi
REF_OUTPUT = $(DATASET_DIR)/$(TEST)/output/output.txt
RUNDIR = run/$(VERSION)/$(TEST)
OUTPUT = $(RUNDIR)/matrix3.txt
OUTPUT = $(RUNDIR)/output.txt
ARGS = -i $(MATRIX1),$(MATRIX2),$(MATRIX2T) -o $(OUTPUT)
ARGS = -i $(IMAGE) -o $(OUTPUT)
#ARGS = -i $(VIDEO) -o $(OUTPUT)
TOOL = tools/compare-output
#TOOL=echo
include $(PARBOIL_ROOT)/common/mk/Makefile
......@@ -10,6 +10,8 @@
* Main entry of dense matrix-matrix multiplication kernel
*/
#include "opencv2/opencv.hpp"
#include "opencv2/core/ocl.hpp"
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
......@@ -41,12 +43,12 @@ extern char* readFile(const char*);
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
extern "C" {
struct __attribute__((__packed__)) OutStruct {
//struct __attribute__((__packed__)) OutStruct {
//int m;
//int n;
size_t bytesB;
size_t bytesOut;
};
//size_t bytesB;
//size_t bytesOut;
//};
struct __attribute__((__packed__)) InStruct {
float* I ;
......@@ -57,6 +59,10 @@ struct __attribute__((__packed__)) InStruct {
size_t bytesL;
float* S;
size_t bytesS;
float* G;
size_t bytesG;
float* E;
size_t bytesE;
int m;
int n;
};
......@@ -66,6 +72,8 @@ void packData(struct InStruct* args, float* I, size_t bytesI,
float* B, size_t bytesB,
float* L, size_t bytesL,
float* S, size_t bytesS,
float* G, size_t bytesG,
float* E, size_t bytesE,
int m, int n) {
args->I = I;
args->bytesI = bytesI;
......@@ -75,6 +83,10 @@ void packData(struct InStruct* args, float* I, size_t bytesI,
args->bytesL = bytesL;
args->S = S;
args->bytesS = bytesS;
args->G = G;
args->bytesG = bytesG;
args->E = E;
args->bytesE = bytesE;
args->m = m;
args->n = n;
}
......@@ -167,10 +179,10 @@ void laplacianEstimate(float *I, size_t bytesI,
//OutStruct output = {bytesB, bytesL};
//if(gx == m-1 && gy == n-1)
//std::cout << "Exit laplacian\n";
__visc__return(bytesB, bytesL);
__visc__return(m);
}
OutStruct WrapperlaplacianEstimate(float *I, size_t bytesI,
void WrapperlaplacianEstimate(float *I, size_t bytesI,
float *B, size_t bytesB,
float *L, size_t bytesL,
int m, int n) {
......@@ -187,9 +199,7 @@ OutStruct WrapperlaplacianEstimate(float *I, size_t bytesI,
__visc__bindIn(LNode, 7, 7, 0); // Bind n
__visc__bindOut(LNode, 0, 0, 0); // bind output m
__visc__bindOut(LNode, 1, 1, 0); // bind output n
return {0};
}
/* Compute the zero crossings of input image L of size m x n */
......@@ -279,10 +289,12 @@ void computeZeroCrossings(float *L, size_t bytesL,
//OutStruct output = {bytesB, bytesS};
//if(gx == n-1 && gy == n-1)
//std::cout << "Exit ZC\n";
__visc__return(bytesB, bytesS);
// FIXME: Return bytesG instead of L. RZC nodes expects bytesG
// This is just temporary
__visc__return(m);
}
OutStruct WrapperComputeZeroCrossings(float *L, size_t bytesL,
void WrapperComputeZeroCrossings(float *L, size_t bytesL,
float *B, size_t bytesB,
float *S, size_t bytesS,
int m, int n) {
......@@ -299,80 +311,183 @@ OutStruct WrapperComputeZeroCrossings(float *L, size_t bytesL,
__visc__bindIn(ZCNode, 7, 7, 0); // Bind n
__visc__bindOut(ZCNode, 0, 0, 0); // bind output m
__visc__bindOut(ZCNode, 1, 1, 0); // bind output n
return {0};
}
/*
* S: sign of image
* G: gradient of (smoothed) image
* E: output image : edges
* theta: threshold value
* maxG: computed maximum graddient of G
*/
void rejectZeroCrossings(float *S, size_t bytesS,
float *G, size_t bytesG,
float *E, size_t bytesE,
//float theta, float maxG,
int m, int n) {
__visc__hint(visc::DEVICE);
__visc__attributes(2, S, G, 1, E);
void* thisNode = __visc__getNode();
int gx = __visc__getNodeInstanceID_x(thisNode);
int gy = __visc__getNodeInstanceID_y(thisNode);
float theta = 1.0;
float maxG = 10.0;
if ((gx < n) && (gy < m)) {
E[gy*n+gx] = ((S[gy*n+gx] > 0.0) && (G[gy*n+gx] > theta*maxG)) ? 1.0 : 0.0 ;
}
__visc__return(m);
}
void WrapperRejectZeroCrossings(float *S, size_t bytesS,
float *G, size_t bytesG,
float *E, size_t bytesE,
//float theta, float maxG,
int m, int n) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, S, G, 1, E);
void* RZCNode = __visc__createNode2D(rejectZeroCrossings, m, n);
__visc__bindIn(RZCNode, 0, 0, 0); // Bind S
__visc__bindIn(RZCNode, 1, 1, 0); // Bind bytesS
__visc__bindIn(RZCNode, 2, 2, 0); // Bind G
__visc__bindIn(RZCNode, 3, 3, 0); // Bind bytesG
__visc__bindIn(RZCNode, 4, 4, 0); // Bind E
__visc__bindIn(RZCNode, 5, 5, 0); // Bind bytesE
__visc__bindIn(RZCNode, 6, 6, 0); // Bind m
__visc__bindIn(RZCNode, 7, 7, 0); // Bind n
__visc__bindOut(RZCNode, 0, 0, 0); // bind output bytesG
//return {0};
}
// Pipelined Root node
OutStruct edgeDetection(float *I, size_t bytesI,
void edgeDetection(float *I, size_t bytesI,
float *B, size_t bytesB,
float *L, size_t bytesL,
float *S, size_t bytesS,
float *G, size_t bytesG,
float *E, size_t bytesE,
int m, int n) {
__visc__attributes(2, I, B, 2, L, S);
void* PNode = __visc__createNode(WrapperlaplacianEstimate);
void* CNode = __visc__createNode(WrapperComputeZeroCrossings);
__visc__bindIn(PNode, 0, 0, 1); // Bind I
__visc__bindIn(PNode, 1, 1, 1); // Bind bytesI
__visc__bindIn(PNode, 2, 2, 1); // Bind B
__visc__bindIn(PNode, 3, 3, 1); // Bind bytesB
__visc__bindIn(PNode, 4, 4, 1); // Bind L
__visc__bindIn(PNode, 5, 5, 1); // Bind bytesL
__visc__bindIn(PNode, 8, 6, 1); // Bind m
__visc__bindIn(PNode, 9, 7, 1); // Bind n
__visc__bindIn(CNode, 4, 0, 1); // Bind L
//__visc__bindIn(CNode, 5, 1, 1); // Bind bytesL
__visc__edge(PNode, CNode, 1, 1, 1); // Get bytesL
__visc__bindIn(CNode, 2, 2, 1); // Bind B
//__visc__bindIn(CNode, 3, 3, 1); // Bind bytesB
__visc__edge(PNode, CNode, 0, 3, 1); // Get bytesB
__visc__bindIn(CNode, 6, 4, 1); // Bind S
__visc__bindIn(CNode, 7, 5, 1); // Bind bytesS
__visc__bindIn(CNode, 8, 6, 1); // Bind m
__visc__bindIn(CNode, 9, 7, 1); // Bind n
//__visc__edge(PNode, CNode, 0, 6, 1); // pass m from P->C
//__visc__edge(PNode, CNode, 1, 7, 1); // pass n from P->C
__visc__bindOut(CNode, 0, 0, 1); // dummy bind output to get pipeline functionality
__visc__bindOut(CNode, 1, 1, 1); // dummy bind output to get pipeline functionality
return {0};
__visc__attributes(2, I, B, 4, L, S, G, E);
void* LNode = __visc__createNode(WrapperlaplacianEstimate);
void* CZCNode = __visc__createNode(WrapperComputeZeroCrossings);
void* RZCNode = __visc__createNode(WrapperRejectZeroCrossings);
// Laplacian Inputs
__visc__bindIn(LNode, 0, 0, 1); // Bind I
__visc__bindIn(LNode, 1, 1, 1); // Bind bytesI
__visc__bindIn(LNode, 2, 2, 1); // Bind B
__visc__bindIn(LNode, 3, 3, 1); // Bind bytesB
__visc__bindIn(LNode, 4, 4, 1); // Bind L
__visc__bindIn(LNode, 5, 5, 1); // Bind bytesL
__visc__bindIn(LNode, 12, 6, 1); // Bind m
__visc__bindIn(LNode, 13, 7, 1); // Bind n
// Compute ZC Inputs
__visc__bindIn(CZCNode, 4, 0, 1); // Bind L
__visc__bindIn(CZCNode, 5, 1, 1); // Get bytesL
__visc__bindIn(CZCNode, 2, 2, 1); // Bind B
__visc__bindIn(CZCNode, 3, 3, 1); // Get bytesB
__visc__bindIn(CZCNode, 6, 4, 1); // Bind S
__visc__bindIn(CZCNode, 7, 5, 1); // Bind bytesS
//__visc__bindIn(CZCNode, 12, 6, 1); // Bind m
__visc__edge(LNode, CZCNode, 0, 6, 1);
__visc__bindIn(CZCNode, 13, 7, 1); // Bind n
// Reject ZC Inputs
__visc__bindIn(RZCNode, 6, 0, 1); // Bind S
__visc__bindIn(RZCNode, 7, 1, 1); // Get bytesS
__visc__bindIn(RZCNode, 8, 2, 1); // Bind G
__visc__bindIn(RZCNode, 9, 3, 1); // Get bytesG
__visc__bindIn(RZCNode, 10, 4, 1); // Bind E
__visc__bindIn(RZCNode, 11, 5, 1); // Bind bytesE
//__visc__bindIn(RZCNode, 12, 6, 1); // Bind m
__visc__edge(CZCNode, RZCNode, 0, 6, 1);
__visc__bindIn(RZCNode, 13, 7, 1); // Bind n
__visc__bindOut(RZCNode, 0, 0, 1); // dummy bind output to get pipeline functionality
}
}
#define NUM_RUNS 5
#define NUM_RUNS 1
#define NUM_FRAMES 20
using namespace cv;
int main (int argc, char *argv[]) {
struct pb_Parameters *params;
struct pb_TimerSet timers;
size_t I_sz, L_sz, S_sz;
size_t I_sz, L_sz, S_sz, G_sz, E_sz;
int matIrow, matIcol;
std::vector<float> matI;
std::string window_name = "Edge Map";
std::cout << "Using OpenCV" << CV_VERSION << "\n";
/* Read command line. Expect 3 inputs: A, B and B^T
in column-major layout*/
params = pb_ReadParameters(&argc, argv);
if ((params->inpFiles[0] == NULL)
|| (params->inpFiles[1] == NULL)
|| (params->inpFiles[2] == NULL)
|| (params->inpFiles[3] != NULL))
|| (params->inpFiles[1] != NULL))
{
fprintf(stderr, "Expecting three input filenames\n");
fprintf(stderr, "Expecting input image filename\n");
exit(-1);
}
/* Read in data */
// load A
readColMajorMatrixFile(params->inpFiles[0],
matIrow, matIcol, matI);
//std::cout << "Reading video file: " << params->inpFiles[0] << "\n";
//VideoCapture cap(params->inpFiles[0]);
//if(!cap.isOpened()) {
//std::cout << "Could not open video file" << "\n";
//return -1;
//}
//double count = cap.get(CV_CAP_PROP_FRAME_COUNT);
//namedWindow(window_name, CV_WINDOW_AUTOSIZE);
//Mat src;
//while(1) {
//bool success = cap.read(src);
//if(!success) {
//std::cout << "Cannot read frame" << "\n";
//return -1;
//}
//imshow(window_name, src);
//if(waitKey(0) == 27) break;
//}
Mat src = imread(params->inpFiles[0]);
if( !src.data )
{
return -1;
}
/// Create a window
namedWindow( window_name, CV_WINDOW_AUTOSIZE );
imshow(window_name, src);
waitKey(0);
std::cout << "Image dimension = " << src.size() << "\n";
/// Convert the image to grayscale
cvtColor( src, src, CV_BGR2GRAY );
src.convertTo(src, CV_32F, 1.0/255.0);
if(src.isContinuous()) {
matI.assign((float*)src.datastart, (float*)src.dataend);
std::cout << "Assigning image to float vector\n";
std::cout << "Vector size = " << matI.size() << "\n";
matIrow = src.size[0];
matIcol = src.size[1];
}
else {
std::cout << "Expecting contiguous storage of image in memory!\n";
exit(-1);
}
pb_InitializeTimerSet(&timers);
__visc__init();
......@@ -384,10 +499,14 @@ int main (int argc, char *argv[]) {
// allocate space for C
S_sz = I_sz;
G_sz = I_sz;
E_sz = I_sz;
// OpenCL memory allocation
std::vector<float> matL(matIrow*matIcol);
std::vector<float> matS(matIrow*matIcol);
std::vector<float> matG(matIrow*matIcol);
std::vector<float> matE(matIrow*matIcol);
float B[] = {0, 1, 0, 1, 1, 1, 0, 1, 0};
size_t bytesB = 9*sizeof(float);
......@@ -402,6 +521,8 @@ int main (int argc, char *argv[]) {
for(size_t i=0; i<matL.size(); i++) {
matL[i] = 0.0f;
matS[i] = 0.0f;
matG[i] = 0.0f;
matE[i] = 0.0f;
}
pb_SwitchToTimer( &timers, visc_TimerID_COMPUTATION );
......@@ -411,6 +532,8 @@ int main (int argc, char *argv[]) {
B, bytesB,
&matL.front(), L_sz,
&matS.front(), S_sz,
&matG.front(), G_sz,
&matE.front(), E_sz,
matIrow, matIcol);
assert(I_sz % BlockSize == 0);
......@@ -421,6 +544,8 @@ int main (int argc, char *argv[]) {
llvm_visc_track_mem(&matI[0], I_sz);
llvm_visc_track_mem(&matL[0], L_sz);
llvm_visc_track_mem(&matS[0], S_sz);
llvm_visc_track_mem(&matG[0], G_sz);
llvm_visc_track_mem(&matE[0], E_sz);
llvm_visc_track_mem(B, bytesB);
//packData(args, &matA[0], BlockSize, &matB[i], BlockSize, &matC[i], BlockSize, BlockElements);
__visc__push(DFG, args);
......@@ -435,6 +560,8 @@ int main (int argc, char *argv[]) {
llvm_visc_untrack_mem(&matI[0]);
llvm_visc_untrack_mem(&matL[0]);
llvm_visc_untrack_mem(&matS[0]);
llvm_visc_untrack_mem(&matG[0]);
llvm_visc_untrack_mem(&matE[0]);
llvm_visc_untrack_mem(B);
//for(unsigned i=0 ; i < I_sz/sizeof(float); i = i + 16) {
......@@ -472,6 +599,10 @@ int main (int argc, char *argv[]) {
writeColMajorMatrixFile(params->outFile,
matIrow, matIcol, matS);
}
Mat output(src.size[0], src.size[1], CV_32F);
memcpy(output.data, matS.data(), matS.size()*sizeof(float));
imshow(window_name, output);
waitKey(0);
double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
std::cout<< "GFLOPs = " << 2.* matIrow * matIcol * matIcol/GPUtime/1e9 << std::endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment