Skip to content
Snippets Groups Projects
Commit bfb16782 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Merge branch 'hpvm9-parboil-pipeline' into 'hpvm-reorg-9'

Hpvm9 parboil pipeline

See merge request llvm/hpvm!7
parents 78174a9b 3f068f63
No related branches found
No related tags found
No related merge requests found
...@@ -74,7 +74,8 @@ void llvm_visc_policy_init() { ...@@ -74,7 +74,8 @@ void llvm_visc_policy_init() {
// policy = new NodePolicy(); // policy = new NodePolicy();
// policy = new IterationPolicy(); // policy = new IterationPolicy();
// policy = new DeviceStatusPolicy(); // policy = new DeviceStatusPolicy();
policy = new InteractivePolicy(); // policy = new InteractivePolicy();
policy = new ConstPolicy(0);
cout << "DONE: Initializing policy object.\n"; cout << "DONE: Initializing policy object.\n";
} }
......
...@@ -4,11 +4,9 @@ LANGUAGE=visc ...@@ -4,11 +4,9 @@ LANGUAGE=visc
SRCDIR_OBJS=io.ll #compute_gold.o SRCDIR_OBJS=io.ll #compute_gold.o
VISC_OBJS=main.visc.ll VISC_OBJS=main.visc.ll
APP_CUDALDFLAGS=-lm -lstdc++ APP_CUDALDFLAGS=-lm -lstdc++
APP_CFLAGS+=-ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize #-I/shared/opencv/include APP_CFLAGS+=-ffast-math -O3 -I/opt/opencv/include
APP_CXXFLAGS+=-ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize #-I/shared/opencv/include APP_CXXFLAGS+=-ffast-math -O3 -I/opt/opencv/include
OpenCV_DIR=/shared/opencv
APP_LDFLAGS=`pkg-config opencv --libs` APP_LDFLAGS=`pkg-config opencv --libs`
#APP_LDFLAGS=`pkg-config ${OpenCV_DIR}/lib/pkgconfig/opencv.pc --libs`
#APP_LDFLAGS=-L/usr/local/cuda/lib64 -rdynamic /opt/opencv/lib/libopencv_videostab.so.3.0.0 /opt/opencv/lib/libopencv_videoio.so.3.0.0 /opt/opencv/lib/libopencv_video.so.3.0.0 /opt/opencv/lib/libopencv_superres.so.3.0.0 /opt/opencv/lib/libopencv_stitching.so.3.0.0 /opt/opencv/lib/libopencv_shape.so.3.0.0 /opt/opencv/lib/libopencv_photo.so.3.0.0 /opt/opencv/lib/libopencv_objdetect.so.3.0.0 /opt/opencv/lib/libopencv_ml.so.3.0.0 /opt/opencv/lib/libopencv_imgproc.so.3.0.0 /opt/opencv/lib/libopencv_imgcodecs.so.3.0.0 /opt/opencv/lib/libopencv_highgui.so.3.0.0 /opt/opencv/lib/libopencv_hal.a /opt/opencv/lib/libopencv_flann.so.3.0.0 /opt/opencv/lib/libopencv_features2d.so.3.0.0 /opt/opencv/lib/libopencv_core.so.3.0.0 /opt/opencv/lib/libopencv_calib3d.so.3.0.0 /opt/opencv/lib/libopencv_hal.a -ldl -lm -lpthread -lrt /opt/opencv/share/OpenCV/3rdparty/lib/libippicv.a -Wl,-rpath,/usr/local/cuda/lib64:/opt/opencv/lib #APP_LDFLAGS=-L/usr/local/cuda/lib64 -rdynamic /opt/opencv/lib/libopencv_videostab.so.3.0.0 /opt/opencv/lib/libopencv_videoio.so.3.0.0 /opt/opencv/lib/libopencv_video.so.3.0.0 /opt/opencv/lib/libopencv_superres.so.3.0.0 /opt/opencv/lib/libopencv_stitching.so.3.0.0 /opt/opencv/lib/libopencv_shape.so.3.0.0 /opt/opencv/lib/libopencv_photo.so.3.0.0 /opt/opencv/lib/libopencv_objdetect.so.3.0.0 /opt/opencv/lib/libopencv_ml.so.3.0.0 /opt/opencv/lib/libopencv_imgproc.so.3.0.0 /opt/opencv/lib/libopencv_imgcodecs.so.3.0.0 /opt/opencv/lib/libopencv_highgui.so.3.0.0 /opt/opencv/lib/libopencv_hal.a /opt/opencv/lib/libopencv_flann.so.3.0.0 /opt/opencv/lib/libopencv_features2d.so.3.0.0 /opt/opencv/lib/libopencv_core.so.3.0.0 /opt/opencv/lib/libopencv_calib3d.so.3.0.0 /opt/opencv/lib/libopencv_hal.a -ldl -lm -lpthread -lrt /opt/opencv/share/OpenCV/3rdparty/lib/libippicv.a -Wl,-rpath,/usr/local/cuda/lib64:/opt/opencv/lib
#OpenCV link flags all #OpenCV link flags all
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
*/ */
#include "opencv2/opencv.hpp" #include "opencv2/opencv.hpp"
#include "opencv2/ocl/ocl.hpp" #include "opencv2/core/ocl.hpp"
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -201,7 +201,7 @@ void gaussianSmoothing(float *I, size_t bytesI, ...@@ -201,7 +201,7 @@ void gaussianSmoothing(float *I, size_t bytesI,
Is[gloc] = smoothedVal; Is[gloc] = smoothedVal;
} }
__visc__return(2, m, n); __visc__return(2, bytesIs, bytesIs);
} }
void WrapperGaussianSmoothing(float *I, size_t bytesI, void WrapperGaussianSmoothing(float *I, size_t bytesI,
...@@ -220,8 +220,8 @@ void WrapperGaussianSmoothing(float *I, size_t bytesI, ...@@ -220,8 +220,8 @@ void WrapperGaussianSmoothing(float *I, size_t bytesI,
__visc__bindIn(GSNode, 6, 6, 0); // Bind m __visc__bindIn(GSNode, 6, 6, 0); // Bind m
__visc__bindIn(GSNode, 7, 7, 0); // Bind n __visc__bindIn(GSNode, 7, 7, 0); // Bind n
__visc__bindOut(GSNode, 0, 0, 0); // bind output m __visc__bindOut(GSNode, 0, 0, 0); // bind output bytesIs
__visc__bindOut(GSNode, 1, 1, 0); // bind output n __visc__bindOut(GSNode, 1, 1, 0); // bind output bytesIs
} }
...@@ -243,14 +243,9 @@ void laplacianEstimate(float *Is, size_t bytesIs, ...@@ -243,14 +243,9 @@ void laplacianEstimate(float *Is, size_t bytesIs,
// 3x3 image area // 3x3 image area
float imageArea[SZB*SZB]; float imageArea[SZB*SZB];
//int gx = get_global_id(0);
//int gy = get_global_id(1);
void* thisNode = __visc__getNode(); void* thisNode = __visc__getNode();
long gx = __visc__getNodeInstanceID_x(thisNode); long gx = __visc__getNodeInstanceID_x(thisNode);
long gy = __visc__getNodeInstanceID_y(thisNode); long gy = __visc__getNodeInstanceID_y(thisNode);
//if(gx == 0 && gy == 0)
//std::cout << "Entered laplacian\n";
int i, j; int i, j;
if ((gx < n) && (gy < m)) { if ((gx < n) && (gy < m)) {
...@@ -309,10 +304,7 @@ void laplacianEstimate(float *Is, size_t bytesIs, ...@@ -309,10 +304,7 @@ void laplacianEstimate(float *Is, size_t bytesIs,
float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1 * SZB +1]; float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1 * SZB +1];
L[gy*n+gx] = laplacian; L[gy*n+gx] = laplacian;
} }
//OutStruct output = {bytesB, bytesL}; __visc__return(1, bytesL);
//if(gx == m-1 && gy == n-1)
//std::cout << "Exit laplacian\n";
__visc__return(1, m);
} }
void WrapperlaplacianEstimate(float *Is, size_t bytesIs, void WrapperlaplacianEstimate(float *Is, size_t bytesIs,
...@@ -331,7 +323,7 @@ void WrapperlaplacianEstimate(float *Is, size_t bytesIs, ...@@ -331,7 +323,7 @@ void WrapperlaplacianEstimate(float *Is, size_t bytesIs,
__visc__bindIn(LNode, 6, 6, 0); // Bind m __visc__bindIn(LNode, 6, 6, 0); // Bind m
__visc__bindIn(LNode, 7, 7, 0); // Bind n __visc__bindIn(LNode, 7, 7, 0); // Bind n
__visc__bindOut(LNode, 0, 0, 0); // bind output m __visc__bindOut(LNode, 0, 0, 0); // bind output bytesL
} }
...@@ -354,15 +346,11 @@ void computeZeroCrossings(float *L, size_t bytesL, ...@@ -354,15 +346,11 @@ void computeZeroCrossings(float *L, size_t bytesL,
// 3x3 image area // 3x3 image area
float imageArea[SZB][SZB]; float imageArea[SZB][SZB];
//int gx = get_global_id(0);
//int gy = get_global_id(1);
void* thisNode = __visc__getNode(); void* thisNode = __visc__getNode();
long gx = __visc__getNodeInstanceID_x(thisNode); long gx = __visc__getNodeInstanceID_x(thisNode);
long gy = __visc__getNodeInstanceID_y(thisNode); long gy = __visc__getNodeInstanceID_y(thisNode);
int i, j; int i, j;
//if(gx == 0 && gy == 0)
//std::cout << "Entered ZC\n";
if ((gx < n) && (gy < m)) { if ((gx < n) && (gy < m)) {
// Data copy for dilation filter // Data copy for dilation filter
imageArea[1][1] = L[gy * n + gx] > MIN_BR? MAX_BR : MIN_BR; imageArea[1][1] = L[gy * n + gx] > MIN_BR? MAX_BR : MIN_BR;
...@@ -431,10 +419,7 @@ void computeZeroCrossings(float *L, size_t bytesL, ...@@ -431,10 +419,7 @@ void computeZeroCrossings(float *L, size_t bytesL,
float pixelSign = dilatedPixel - erodedPixel; float pixelSign = dilatedPixel - erodedPixel;
S[gy*n+gx] = pixelSign; S[gy*n+gx] = pixelSign;
} }
//OutStruct output = {bytesB, bytesS}; __visc__return(1, bytesS);
//if(gx == n-1 && gy == n-1)
//std::cout << "Exit ZC\n";
__visc__return(1, m);
} }
void WrapperComputeZeroCrossings(float *L, size_t bytesL, void WrapperComputeZeroCrossings(float *L, size_t bytesL,
...@@ -453,7 +438,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL, ...@@ -453,7 +438,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL,
__visc__bindIn(ZCNode, 6, 6, 0); // Bind m __visc__bindIn(ZCNode, 6, 6, 0); // Bind m
__visc__bindIn(ZCNode, 7, 7, 0); // Bind n __visc__bindIn(ZCNode, 7, 7, 0); // Bind n
__visc__bindOut(ZCNode, 0, 0, 0); // bind output m __visc__bindOut(ZCNode, 0, 0, 0); // bind output bytesS
} }
...@@ -519,9 +504,8 @@ void computeGradient(float *Is, size_t bytesIs, ...@@ -519,9 +504,8 @@ void computeGradient(float *Is, size_t bytesIs,
} }
G[gloc] = sqrt(Gx*Gx + Gy*Gy); G[gloc] = sqrt(Gx*Gx + Gy*Gy);
//G[gloc] = Gx*Gx + Gy*Gy;
} }
__visc__return(1, n); __visc__return(1, bytesG);
} }
void WrapperComputeGradient(float *Is, size_t bytesIs, void WrapperComputeGradient(float *Is, size_t bytesIs,
...@@ -543,7 +527,7 @@ void WrapperComputeGradient(float *Is, size_t bytesIs, ...@@ -543,7 +527,7 @@ void WrapperComputeGradient(float *Is, size_t bytesIs,
__visc__bindIn(CGNode, 8, 8, 0); // Bind m __visc__bindIn(CGNode, 8, 8, 0); // Bind m
__visc__bindIn(CGNode, 9, 9, 0); // Bind n __visc__bindIn(CGNode, 9, 9, 0); // Bind n
__visc__bindOut(CGNode, 0, 0, 0); // bind output m __visc__bindOut(CGNode, 0, 0, 0); // bind output bytesG
} }
/* /*
...@@ -558,7 +542,6 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, ...@@ -558,7 +542,6 @@ void computeMaxGradientLeaf(float *G, size_t bytesG,
long m, long n) { long m, long n) {
__visc__hint(visc::CPU_TARGET); __visc__hint(visc::CPU_TARGET);
//__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, G, 1, maxG); __visc__attributes(1, G, 1, maxG);
void* thisNode = __visc__getNode(); void* thisNode = __visc__getNode();
...@@ -566,7 +549,6 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, ...@@ -566,7 +549,6 @@ void computeMaxGradientLeaf(float *G, size_t bytesG,
long lx = __visc__getNodeInstanceID_x(thisNode); // threadIdx.x long lx = __visc__getNodeInstanceID_x(thisNode); // threadIdx.x
long dimx = __visc__getNumNodeInstances_x(thisNode); // blockDim.x long dimx = __visc__getNumNodeInstances_x(thisNode); // blockDim.x
// Assume a single thread block // Assume a single thread block
// Thread block iterates over all elements // Thread block iterates over all elements
for (int i = lx + dimx; i < m*n; i+= dimx) { for (int i = lx + dimx; i < m*n; i+= dimx) {
...@@ -584,57 +566,13 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, ...@@ -584,57 +566,13 @@ void computeMaxGradientLeaf(float *G, size_t bytesG,
*maxG = G[lx]; *maxG = G[lx];
} }
__visc__return(1, n); __visc__return(1, bytesMaxG);
} }
/*
* Reduction
* G : input
* maxG: output
* Each static node processes 2*nodeDim elements
* Need 1D grid, a thread per 2 pixels
*/
//void computeMaxGradientLeaf(float *G, size_t bytesG,
//float *maxG, size_t bytesMaxG,
//int m, int n) {
//__visc__hint(visc::DEVICE);
//TODO: maxG should be initialized to zero (MIN_BR) every time
//__visc__attributes(2, G, maxG, 1, maxG);
//void* thisNode = __visc__getNode();
//void* parentNode = __visc__getParentNode(thisNode);
//int lx = __visc__getNodeInstanceID_x(thisNode);
//int px = __visc__getNodeInstanceID_x(parentNode);
//int dimx = __visc__getNumNodeInstances_x(thisNode);
//int gid = lx + 2*px*dimx;
//for (unsigned stride = dimx; stride > 32; stride >>= 1) {
//if ((gid + stride < m*n) && (lx < stride))
//if (G[gid + stride] > G[gid])
//G[gid] = G[gid + stride];
//__visc__barrier();
//}
//for (unsigned stride = 32; stride >= 1; stride >>= 1) {
//if ((gid + stride < m*n) && (lx < stride))
//if (G[gid + stride] > G[gid])
//G[gid] = G[gid + stride];
//}
//if (lx == 0)
//__visc__atomic_max(maxG,G[gid]);
//__visc__return(1, m);
//}
void computeMaxGradientTB(float *G, size_t bytesG, void computeMaxGradientTB(float *G, size_t bytesG,
float *maxG, size_t bytesMaxG, float *maxG, size_t bytesMaxG,
long m, long n, long m, long n,
long block_x) { long block_x) {
//__visc__hint(visc::DEVICE);
__visc__hint(visc::CPU_TARGET); __visc__hint(visc::CPU_TARGET);
__visc__attributes(2, G, maxG, 1, maxG); __visc__attributes(2, G, maxG, 1, maxG);
void* CMGLeafNode = __visc__createNodeND(1, computeMaxGradientLeaf, block_x); void* CMGLeafNode = __visc__createNodeND(1, computeMaxGradientLeaf, block_x);
...@@ -645,7 +583,7 @@ void computeMaxGradientTB(float *G, size_t bytesG, ...@@ -645,7 +583,7 @@ void computeMaxGradientTB(float *G, size_t bytesG,
__visc__bindIn(CMGLeafNode, 4, 4, 0); // Bind m __visc__bindIn(CMGLeafNode, 4, 4, 0); // Bind m
__visc__bindIn(CMGLeafNode, 5, 5, 0); // Bind n __visc__bindIn(CMGLeafNode, 5, 5, 0); // Bind n
__visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output m __visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output bytesMaxG
} }
void WrapperComputeMaxGradient(float *G, size_t bytesG, void WrapperComputeMaxGradient(float *G, size_t bytesG,
...@@ -663,7 +601,7 @@ void WrapperComputeMaxGradient(float *G, size_t bytesG, ...@@ -663,7 +601,7 @@ void WrapperComputeMaxGradient(float *G, size_t bytesG,
__visc__bindIn(CMGTBNode, 5, 5, 0); // Bind n __visc__bindIn(CMGTBNode, 5, 5, 0); // Bind n
__visc__bindIn(CMGTBNode, 6, 6, 0); // Bind block_x __visc__bindIn(CMGTBNode, 6, 6, 0); // Bind block_x
__visc__bindOut(CMGTBNode, 0, 0, 0); // bind output m __visc__bindOut(CMGTBNode, 0, 0, 0); // bind output bytesMaxG
} }
/* Reject the zero crossings where the gradient is below a threshold */ /* Reject the zero crossings where the gradient is below a threshold */
...@@ -682,7 +620,6 @@ void rejectZeroCrossings(float *S, size_t bytesS, ...@@ -682,7 +620,6 @@ void rejectZeroCrossings(float *S, size_t bytesS,
float *E, size_t bytesE, float *E, size_t bytesE,
long m, long n) { long m, long n) {
__visc__hint(visc::DEVICE); __visc__hint(visc::DEVICE);
//__visc__hint(visc::CPU_TARGET);
__visc__attributes(3, S, G, maxG, 1, E); __visc__attributes(3, S, G, maxG, 1, E);
void* thisNode = __visc__getNode(); void* thisNode = __visc__getNode();
...@@ -690,11 +627,10 @@ void rejectZeroCrossings(float *S, size_t bytesS, ...@@ -690,11 +627,10 @@ void rejectZeroCrossings(float *S, size_t bytesS,
int gy = __visc__getNodeInstanceID_y(thisNode); int gy = __visc__getNodeInstanceID_y(thisNode);
float mG = *maxG; float mG = *maxG;
//float mG = 1.39203;
if ((gx < n) && (gy < m)) { if ((gx < n) && (gy < m)) {
E[gy*n+gx] = ((S[gy*n+gx] > 0.0) && (G[gy*n+gx] > THETA*mG)) ? 1.0 : 0.0 ; E[gy*n+gx] = ((S[gy*n+gx] > 0.0) && (G[gy*n+gx] > THETA*mG)) ? 1.0 : 0.0 ;
} }
__visc__return(1, m); __visc__return(1, bytesE);
} }
void WrapperRejectZeroCrossings(float *S, size_t bytesS, void WrapperRejectZeroCrossings(float *S, size_t bytesS,
...@@ -716,7 +652,7 @@ void WrapperRejectZeroCrossings(float *S, size_t bytesS, ...@@ -716,7 +652,7 @@ void WrapperRejectZeroCrossings(float *S, size_t bytesS,
__visc__bindIn(RZCNode, 8, 8 , 0); // Bind m __visc__bindIn(RZCNode, 8, 8 , 0); // Bind m
__visc__bindIn(RZCNode, 9, 9, 0); // Bind n __visc__bindIn(RZCNode, 9, 9, 0); // Bind n
__visc__bindOut(RZCNode, 0, 0, 0); // bind output m __visc__bindOut(RZCNode, 0, 0, 0); // bind output bytesE
} }
...@@ -759,67 +695,59 @@ void edgeDetection(float *I, size_t bytesI, // 0 ...@@ -759,67 +695,59 @@ void edgeDetection(float *I, size_t bytesI, // 0
// Laplacian Inputs // Laplacian Inputs
__visc__bindIn(LNode, 2 , 0, 1); // Bind Is __visc__bindIn(LNode, 2 , 0, 1); // Bind Is
__visc__bindIn(LNode, 3 , 1, 1); // Bind bytesIs __visc__edge(GSNode, LNode, 1, 0, 1, 1); // Get bytesIs
__visc__bindIn(LNode, 16, 2, 1); // Bind B __visc__bindIn(LNode, 16, 2, 1); // Bind B
__visc__bindIn(LNode, 17, 3, 1); // Bind bytesB __visc__bindIn(LNode, 17, 3, 1); // Bind bytesB
__visc__bindIn(LNode, 4 , 4, 1); // Bind L __visc__bindIn(LNode, 4 , 4, 1); // Bind L
__visc__bindIn(LNode, 5 , 5, 1); // Bind bytesL __visc__bindIn(LNode, 5 , 5, 1); // Bind bytesL
// __visc__bindIn(LNode, 22, 6, 1); // Bind m __visc__bindIn(LNode, 22, 6, 1); // Bind m
__visc__edge(GSNode, LNode, 1, 0, 6, 1); // Get m
__visc__bindIn(LNode, 23, 7, 1); // Bind n __visc__bindIn(LNode, 23, 7, 1); // Bind n
// Compute ZC Inputs // Compute ZC Inputs
__visc__bindIn(CZCNode, 4 , 0, 1); // Bind L __visc__bindIn(CZCNode, 4 , 0, 1); // Bind L
__visc__bindIn(CZCNode, 5 , 1, 1); // Bind bytesL __visc__edge(LNode, CZCNode, 1, 0, 1, 1); // Get bytesL
__visc__bindIn(CZCNode, 16, 2, 1); // Bind B __visc__bindIn(CZCNode, 16, 2, 1); // Bind B
__visc__bindIn(CZCNode, 17, 3, 1); // Bind bytesB __visc__bindIn(CZCNode, 17, 3, 1); // Bind bytesB
__visc__bindIn(CZCNode, 6 , 4, 1); // Bind S __visc__bindIn(CZCNode, 6 , 4, 1); // Bind S
__visc__bindIn(CZCNode, 7 , 5, 1); // Bind bytesS __visc__bindIn(CZCNode, 7 , 5, 1); // Bind bytesS
//__visc__bindIn(CZCNode, 22, 6, 1); // Bind m __visc__bindIn(CZCNode, 22, 6, 1); // Bind m
__visc__edge(LNode, CZCNode, 1, 0, 6, 1); // Get m
__visc__bindIn(CZCNode, 23, 7, 1); // Bind n __visc__bindIn(CZCNode, 23, 7, 1); // Bind n
// Gradient Inputs // Gradient Inputs
__visc__bindIn(CGNode, 2 , 0, 1); // Bind Is __visc__bindIn(CGNode, 2 , 0, 1); // Bind Is
__visc__bindIn(CGNode, 3 , 1, 1); // Bind bytesIs __visc__edge(GSNode, CGNode, 1, 1, 1, 1); // Get bytesIs
__visc__bindIn(CGNode, 18, 2, 1); // Bind Sx __visc__bindIn(CGNode, 18, 2, 1); // Bind Sx
__visc__bindIn(CGNode, 19, 3, 1); // Bind bytesSx __visc__bindIn(CGNode, 19, 3, 1); // Bind bytesSx
__visc__bindIn(CGNode, 20, 4, 1); // Bind Sy __visc__bindIn(CGNode, 20, 4, 1); // Bind Sy
__visc__bindIn(CGNode, 21, 5, 1); // Bind bytesSy __visc__bindIn(CGNode, 21, 5, 1); // Bind bytesSy
__visc__bindIn(CGNode, 8 , 6, 1); // Bind G __visc__bindIn(CGNode, 8 , 6, 1); // Bind G
__visc__bindIn(CGNode, 9 , 7, 1); // Bind bytesG __visc__bindIn(CGNode, 9 , 7, 1); // Bind bytesG
__visc__bindIn(CGNode, 22, 8, 1); // Bind m __visc__bindIn(CGNode, 22, 8, 1); // Bind m
//__visc__edge(CZCNode, CGNode, 1, 0, 8, 1); // Get m __visc__bindIn(CGNode, 23, 9, 1); // Bind n
//__visc__bindIn(CGNode, 23, 9, 1); // Bind n
__visc__edge(GSNode, CGNode, 1, 1, 9, 1); // Get n
// Max Gradient Inputs // Max Gradient Inputs
__visc__bindIn(CMGNode, 8 , 0, 1); // Bind G __visc__bindIn(CMGNode, 8 , 0, 1); // Bind G
__visc__bindIn(CMGNode, 9 , 1, 1); // Bind bytesG __visc__edge(CGNode, CMGNode, 1, 0, 1, 1); // Get bytesG
__visc__bindIn(CMGNode, 10, 2, 1); // Bind maxG __visc__bindIn(CMGNode, 10, 2, 1); // Bind maxG
__visc__bindIn(CMGNode, 11, 3, 1); // Bind bytesMaxG __visc__bindIn(CMGNode, 11, 3, 1); // Bind bytesMaxG
__visc__bindIn(CMGNode, 22, 4, 1); // Bind m __visc__bindIn(CMGNode, 22, 4, 1); // Bind m
//__visc__edge(CGNode, CMGNode, 1, 0, 4, 1); // Get m __visc__bindIn(CMGNode, 23, 5, 1); // Bind n
//__visc__bindIn(CMGNode, 23, 5, 1); // Bind n
__visc__edge(CGNode, CMGNode, 1, 0, 5, 1); // Get n
__visc__bindIn(CMGNode, 24, 6, 1); // Bind block_x __visc__bindIn(CMGNode, 24, 6, 1); // Bind block_x
__visc__bindIn(CMGNode, 25, 7, 1); // Bind grid_x __visc__bindIn(CMGNode, 25, 7, 1); // Bind grid_x
// Reject ZC Inputs // Reject ZC Inputs
__visc__bindIn(RZCNode, 6 , 0, 1); // Bind S __visc__bindIn(RZCNode, 6 , 0, 1); // Bind S
__visc__bindIn(RZCNode, 7 , 1, 1); // Bind bytesS __visc__edge(CZCNode, RZCNode, 1, 0, 1, 1); // Get bytesS
__visc__bindIn(RZCNode, 8 , 2, 1); // Bind G __visc__bindIn(RZCNode, 8 , 2, 1); // Bind G
__visc__bindIn(RZCNode, 9 , 3, 1); // Bind bytesG __visc__bindIn(RZCNode, 9 , 3, 1); // Bind bytesG
__visc__bindIn(RZCNode, 10, 4, 1); // Bind maxG __visc__bindIn(RZCNode, 10, 4, 1); // Bind maxG
__visc__bindIn(RZCNode, 11, 5, 1); // Bind bytesMaxG __visc__edge(CMGNode, RZCNode, 1, 0, 5, 1); // Get bytesMaxG
__visc__bindIn(RZCNode, 12, 6, 1); // Bind E __visc__bindIn(RZCNode, 12, 6, 1); // Bind E
__visc__bindIn(RZCNode, 13, 7, 1); // Bind bytesE __visc__bindIn(RZCNode, 13, 7, 1); // Bind bytesE
//__visc__bindIn(RZCNode, 22, 8, 1); // Bind m __visc__bindIn(RZCNode, 22, 8, 1); // Bind m
__visc__edge(CZCNode, RZCNode, 1, 0, 8, 1); // Get m __visc__bindIn(RZCNode, 23, 9, 1); // Bind n
//__visc__bindIn(RZCNode, 23, 9, 1); // Bind n
__visc__edge(CMGNode, RZCNode, 1, 0, 9, 1); // Get n
__visc__bindOut(RZCNode, 0, 0, 1); // dummy bind output to get pipeline functionality __visc__bindOut(RZCNode, 0, 0, 1); // Bind output
} }
} }
...@@ -833,18 +761,8 @@ void getNextFrame(VideoCapture& VC, Mat& F) { ...@@ -833,18 +761,8 @@ void getNextFrame(VideoCapture& VC, Mat& F) {
cvtColor( F, F, CV_BGR2GRAY ); cvtColor( F, F, CV_BGR2GRAY );
F.convertTo(F, CV_32F, 1.0/255.0); F.convertTo(F, CV_32F, 1.0/255.0);
} }
//void showInOut(Mat& Input, Mat& Output) {
//Mat in, out;
//resize(Input, in, Size(512, 768));
//resize(Output, out, Size(512, 768));
//imshow(input_window, in);
//imshow(output_window, out);
//}
int main (int argc, char *argv[]) { int main (int argc, char *argv[]) {
struct pb_Parameters *params; struct pb_Parameters *params;
...@@ -877,11 +795,6 @@ int main (int argc, char *argv[]) { ...@@ -877,11 +795,6 @@ int main (int argc, char *argv[]) {
NUM_FRAMES = 600; NUM_FRAMES = 600;
std::cout << "Number of frames = " << NUM_FRAMES << "\n"; std::cout << "Number of frames = " << NUM_FRAMES << "\n";
// Used to store time after each frame computation is completed
//timeval *timeStamps = (timeval *)malloc(NUM_RUNS*NUM_FRAMES*sizeof(timeval));
//struct timeval tv_start;
//unsigned long cnt = 0; // Counter, for the video frames
namedWindow(input_window, CV_WINDOW_AUTOSIZE); namedWindow(input_window, CV_WINDOW_AUTOSIZE);
namedWindow(output_window, CV_WINDOW_AUTOSIZE); namedWindow(output_window, CV_WINDOW_AUTOSIZE);
moveWindow(input_window, POSX_IN, POSY_IN); moveWindow(input_window, POSX_IN, POSY_IN);
...@@ -909,7 +822,6 @@ int main (int argc, char *argv[]) { ...@@ -909,7 +822,6 @@ int main (int argc, char *argv[]) {
pb_InitializeTimerSet(&timers); pb_InitializeTimerSet(&timers);
__visc__init(); __visc__init();
//pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE );
// copy A to device memory // copy A to device memory
I_sz = src.size[0]*src.size[1]*sizeof(float); I_sz = src.size[0]*src.size[1]*sizeof(float);
...@@ -920,8 +832,6 @@ int main (int argc, char *argv[]) { ...@@ -920,8 +832,6 @@ int main (int argc, char *argv[]) {
1, 1, 1, 1, 1, 1,
1, 1, 1 }; 1, 1, 1 };
size_t bytesB = 9*sizeof(float); size_t bytesB = 9*sizeof(float);
//Sx = [-1 0 1 ; -2 0 2 ; -1 0 1 ]
//Sy = [-1 -2 -1 ; 0 0 0 ; 1 2 1 ]
float Sx[] = { -1, 0, 1, float Sx[] = { -1, 0, 1,
-2, 0, 2, -2, 0, 2,
-1, 0, 1 }; -1, 0, 1 };
...@@ -945,10 +855,6 @@ int main (int argc, char *argv[]) { ...@@ -945,10 +855,6 @@ int main (int argc, char *argv[]) {
// grid_x should be equal to the number of SMs on GPU. FTX 680 has 8 SMs // grid_x should be equal to the number of SMs on GPU. FTX 680 has 8 SMs
grid_x = 1; grid_x = 1;
// Copy A and B^T into device memory
//pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE );
//showInOut(src, E);
Mat in, out; Mat in, out;
resize(src, in, Size(HEIGHT, WIDTH)); resize(src, in, Size(HEIGHT, WIDTH));
resize(E, out, Size(HEIGHT, WIDTH)); resize(E, out, Size(HEIGHT, WIDTH));
...@@ -956,7 +862,6 @@ int main (int argc, char *argv[]) { ...@@ -956,7 +862,6 @@ int main (int argc, char *argv[]) {
imshow(output_window, out); imshow(output_window, out);
waitKey(0); waitKey(0);
//NUM_FRAMES = 20;
pb_SwitchToTimer( &timers, visc_TimerID_COMPUTATION ); pb_SwitchToTimer( &timers, visc_TimerID_COMPUTATION );
struct InStruct* args = (struct InStruct*)malloc (sizeof(InStruct)); struct InStruct* args = (struct InStruct*)malloc (sizeof(InStruct));
packData(args, (float*)src.data, I_sz, packData(args, (float*)src.data, I_sz,
...@@ -976,13 +881,6 @@ int main (int argc, char *argv[]) { ...@@ -976,13 +881,6 @@ int main (int argc, char *argv[]) {
// Check if the total elements is a multiple of block size // Check if the total elements is a multiple of block size
assert(src.size[0]*src.size[1] % block_x == 0); assert(src.size[0]*src.size[1] % block_x == 0);
//imshow(input_window, src);
//imshow(output_window, E);
//waitKey(0);
// Get the time just before computation starts
//gettimeofday(&tv_start,NULL);
for(unsigned j=0; j<NUM_RUNS; j++) { for(unsigned j=0; j<NUM_RUNS; j++) {
std::cout << "Run: " << j << "\n"; std::cout << "Run: " << j << "\n";
void* DFG = __visc__launch(1, edgeDetection, (void*)args); void* DFG = __visc__launch(1, edgeDetection, (void*)args);
...@@ -990,13 +888,9 @@ int main (int argc, char *argv[]) { ...@@ -990,13 +888,9 @@ int main (int argc, char *argv[]) {
cap = VideoCapture(params->inpFiles[0]); cap = VideoCapture(params->inpFiles[0]);
getNextFrame(cap, src); getNextFrame(cap, src);
//packData(args, A.data, BlockSize, &matB[i], BlockSize, &matC[i], BlockSize, BlockElements);
if(NUM_FRAMES >=2) { if(NUM_FRAMES >=2) {
//__visc__push(DFG, args);
//__visc__push(DFG, args);
for(int i=0; i<NUM_FRAMES; i++) { for(int i=0; i<NUM_FRAMES; i++) {
//std::cout << "Frame " << i << "\n"; std::cout << "Frame " << i << "\n";
args->I = (float*) src.data; args->I = (float*) src.data;
*maxG = 0.0; *maxG = 0.0;
...@@ -1014,53 +908,19 @@ int main (int argc, char *argv[]) { ...@@ -1014,53 +908,19 @@ int main (int argc, char *argv[]) {
llvm_visc_track_mem(Sy, bytesSy); llvm_visc_track_mem(Sy, bytesSy);
__visc__push(DFG, args); __visc__push(DFG, args);
__visc__pop(DFG); void *ret = __visc__pop(DFG);
std::cout << "Returned size: " << *(size_t *)ret
<< " expected " << I_sz << '\n';
//llvm_visc_request_mem(E.data, I_sz);
//std::cout << "Show E" << "\n";
//imshow(window_name, E);
//waitKey(0);
//llvm_visc_request_mem(src.data, I_sz);
//llvm_visc_request_mem(Is.data, I_sz);
//llvm_visc_request_mem(L.data, I_sz);
//llvm_visc_request_mem(S.data, I_sz);
//llvm_visc_request_mem(G.data, I_sz);
llvm_visc_request_mem(maxG, bytesMaxG); llvm_visc_request_mem(maxG, bytesMaxG);
llvm_visc_request_mem(E.data, I_sz); llvm_visc_request_mem(E.data, I_sz);
//std::cout << "src.data = " << (float*)src.data << "\n";
//std::cout << "Is.data = " << (float*)Is.data << "\n";
//std::cout << "L.data = " << (float*)L.data << "\n";
//std::cout << "S.data = " << (float*)S.data << "\n";
//std::cout << "G.data = " << (float*)G.data << "\n";
//std::cout << "E.data = " << (float*)E.data << "\n";
//std::cout << "Max G = " << *maxG << "\n";
//gettimeofday(&timeStamps[cnt], NULL);
//cnt++;
Mat in, out; Mat in, out;
resize(src, in, Size(HEIGHT, WIDTH)); resize(src, in, Size(HEIGHT, WIDTH));
//std::cout << "Show E\n";
resize(E, out, Size(HEIGHT, WIDTH)); resize(E, out, Size(HEIGHT, WIDTH));
imshow(output_window, out); imshow(output_window, out);
imshow(input_window, in); imshow(input_window, in);
waitKey(1); waitKey(1);
//waitKey(0);
//std::cout << "Show Is\n";
//resize(Is, out, Size(HEIGHT, WIDTH));
//imshow(output_window, out);
//waitKey(0);
//std::cout << "Show L\n";
//resize(L, out, Size(HEIGHT, WIDTH));
//imshow(output_window, out);
//waitKey(0);
//std::cout << "Show S\n";
//resize(S, out, Size(HEIGHT, WIDTH));
//imshow(output_window, out);
//waitKey(0);
//std::cout << "Show G\n";
//resize(G, out, Size(HEIGHT, WIDTH));
//imshow(output_window, out);
//waitKey(0);
llvm_visc_untrack_mem(src.data); llvm_visc_untrack_mem(src.data);
llvm_visc_untrack_mem(Is.data); llvm_visc_untrack_mem(Is.data);
...@@ -1075,72 +935,17 @@ int main (int argc, char *argv[]) { ...@@ -1075,72 +935,17 @@ int main (int argc, char *argv[]) {
llvm_visc_untrack_mem(Sy); llvm_visc_untrack_mem(Sy);
getNextFrame(cap, src); getNextFrame(cap, src);
} }
//__visc__pop(DFG);
//__visc__pop(DFG);
} }
else { else {
__visc__push(DFG, args); __visc__push(DFG, args);
__visc__pop(DFG); __visc__pop(DFG);
} }
__visc__wait(DFG); __visc__wait(DFG);
} }
pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers); pb_PrintTimerSet(&timers);
//const char *fn = "timestamps.txt";
//std::ofstream outfile;
//outfile.open(fn);
//if (!outfile.is_open()) {
//std::cout << "Failed to open " << fn << " for writing\n";
//}
//for (unsigned long i = 0; i < cnt; i++) {
//double elapsed = (timeStamps[i].tv_sec - tv_start.tv_sec) +
//((timeStamps[i].tv_usec - tv_start.tv_usec)/1000000.0);
//outfile << elapsed << "\n";
//}
//free(timeStamps);
//outfile.close();
__visc__cleanup(); __visc__cleanup();
//if (params->outFile) {
/* Write C to file */
//pb_SwitchToTimer(&timers, pb_TimerID_IO);
//writeColMajorMatrixFile(params->outFile,
//src.size[0], src.size[1], matE);
//}
//std::cout << "Show Is" << "\n";
//Mat output(src.size[0], src.size[1], CV_32F);
//imshow(output_window, Is);
//waitKey(0);
//std::cout << "Show G" << "\n";
//imshow(output_window, L);
//waitKey(0);
//std::cout << "Show L" << "\n";
//imshow(output_window, S);
//waitKey(0);
//std::cout << "Show S" << "\n";
//imshow(output_window, G);
//waitKey(0);
//std::cout << "Show E" << "\n";
//imshow(output_window, E);
//waitKey(0);
//double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL]));
//std::cout<< "GFLOPs = " << 2.* src.size[0] * src.size[1] * src.size[1]/GPUtime/1e9 << std::endl;
pb_FreeParameters(params); pb_FreeParameters(params);
return 0; return 0;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment