diff --git a/hpvm/projects/visc-rt/visc-rt.cpp b/hpvm/projects/visc-rt/visc-rt.cpp index 2f3fc6282ab6f31ae9b0af043df2fcd0a2c06e57..eff618548f3405b668249791738015043a537f17 100644 --- a/hpvm/projects/visc-rt/visc-rt.cpp +++ b/hpvm/projects/visc-rt/visc-rt.cpp @@ -74,7 +74,8 @@ void llvm_visc_policy_init() { // policy = new NodePolicy(); // policy = new IterationPolicy(); // policy = new DeviceStatusPolicy(); - policy = new InteractivePolicy(); + // policy = new InteractivePolicy(); + policy = new ConstPolicy(0); cout << "DONE: Initializing policy object.\n"; } diff --git a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile index ea6b44788d0831221b2d289d1904d35f56240615..ec39b86f1cf71e2e8b6131b076c2953b566cbb56 100644 --- a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile +++ b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/Makefile @@ -4,11 +4,9 @@ LANGUAGE=visc SRCDIR_OBJS=io.ll #compute_gold.o VISC_OBJS=main.visc.ll APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS+=-ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize #-I/shared/opencv/include -APP_CXXFLAGS+=-ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize #-I/shared/opencv/include -OpenCV_DIR=/shared/opencv +APP_CFLAGS+=-ffast-math -O3 -I/opt/opencv/include +APP_CXXFLAGS+=-ffast-math -O3 -I/opt/opencv/include APP_LDFLAGS=`pkg-config opencv --libs` -#APP_LDFLAGS=`pkg-config ${OpenCV_DIR}/lib/pkgconfig/opencv.pc --libs` #APP_LDFLAGS=-L/usr/local/cuda/lib64 -rdynamic /opt/opencv/lib/libopencv_videostab.so.3.0.0 /opt/opencv/lib/libopencv_videoio.so.3.0.0 /opt/opencv/lib/libopencv_video.so.3.0.0 /opt/opencv/lib/libopencv_superres.so.3.0.0 /opt/opencv/lib/libopencv_stitching.so.3.0.0 /opt/opencv/lib/libopencv_shape.so.3.0.0 /opt/opencv/lib/libopencv_photo.so.3.0.0 /opt/opencv/lib/libopencv_objdetect.so.3.0.0 /opt/opencv/lib/libopencv_ml.so.3.0.0 /opt/opencv/lib/libopencv_imgproc.so.3.0.0 /opt/opencv/lib/libopencv_imgcodecs.so.3.0.0 /opt/opencv/lib/libopencv_highgui.so.3.0.0 /opt/opencv/lib/libopencv_hal.a /opt/opencv/lib/libopencv_flann.so.3.0.0 /opt/opencv/lib/libopencv_features2d.so.3.0.0 /opt/opencv/lib/libopencv_core.so.3.0.0 /opt/opencv/lib/libopencv_calib3d.so.3.0.0 /opt/opencv/lib/libopencv_hal.a -ldl -lm -lpthread -lrt /opt/opencv/share/OpenCV/3rdparty/lib/libippicv.a -Wl,-rpath,/usr/local/cuda/lib64:/opt/opencv/lib #OpenCV link flags all diff --git a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc index 892cdabd090412c80f3a2d26ffd3e7c183650ade..b516f860a8c6b8317b11860127b8dc210c66b246 100644 --- a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc +++ b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc @@ -11,7 +11,7 @@ */ #include "opencv2/opencv.hpp" -#include "opencv2/ocl/ocl.hpp" +#include "opencv2/core/ocl.hpp" #include <stdio.h> #include <math.h> #include <stdlib.h> @@ -201,7 +201,7 @@ void gaussianSmoothing(float *I, size_t bytesI, Is[gloc] = smoothedVal; } - __visc__return(2, m, n); + __visc__return(2, bytesIs, bytesIs); } void WrapperGaussianSmoothing(float *I, size_t bytesI, @@ -220,8 +220,8 @@ void WrapperGaussianSmoothing(float *I, size_t bytesI, __visc__bindIn(GSNode, 6, 6, 0); // Bind m __visc__bindIn(GSNode, 7, 7, 0); // Bind n - __visc__bindOut(GSNode, 0, 0, 0); // bind output m - __visc__bindOut(GSNode, 1, 1, 0); // bind output n + __visc__bindOut(GSNode, 0, 0, 0); // bind output bytesIs + __visc__bindOut(GSNode, 1, 1, 0); // bind output bytesIs } @@ -243,14 +243,9 @@ void laplacianEstimate(float *Is, size_t bytesIs, // 3x3 image area float imageArea[SZB*SZB]; - //int gx = get_global_id(0); - //int gy = get_global_id(1); void* thisNode = __visc__getNode(); long gx = __visc__getNodeInstanceID_x(thisNode); long gy = __visc__getNodeInstanceID_y(thisNode); - //if(gx == 0 && gy == 0) - //std::cout << "Entered laplacian\n"; - int i, j; if ((gx < n) && (gy < m)) { @@ -309,10 +304,7 @@ void laplacianEstimate(float *Is, size_t bytesIs, float laplacian = dilatedPixel + erodedPixel - 2 * imageArea[1 * SZB +1]; L[gy*n+gx] = laplacian; } - //OutStruct output = {bytesB, bytesL}; - //if(gx == m-1 && gy == n-1) - //std::cout << "Exit laplacian\n"; - __visc__return(1, m); + __visc__return(1, bytesL); } void WrapperlaplacianEstimate(float *Is, size_t bytesIs, @@ -331,7 +323,7 @@ void WrapperlaplacianEstimate(float *Is, size_t bytesIs, __visc__bindIn(LNode, 6, 6, 0); // Bind m __visc__bindIn(LNode, 7, 7, 0); // Bind n - __visc__bindOut(LNode, 0, 0, 0); // bind output m + __visc__bindOut(LNode, 0, 0, 0); // bind output bytesL } @@ -354,15 +346,11 @@ void computeZeroCrossings(float *L, size_t bytesL, // 3x3 image area float imageArea[SZB][SZB]; - //int gx = get_global_id(0); - //int gy = get_global_id(1); void* thisNode = __visc__getNode(); long gx = __visc__getNodeInstanceID_x(thisNode); long gy = __visc__getNodeInstanceID_y(thisNode); int i, j; - //if(gx == 0 && gy == 0) - //std::cout << "Entered ZC\n"; if ((gx < n) && (gy < m)) { // Data copy for dilation filter imageArea[1][1] = L[gy * n + gx] > MIN_BR? MAX_BR : MIN_BR; @@ -431,10 +419,7 @@ void computeZeroCrossings(float *L, size_t bytesL, float pixelSign = dilatedPixel - erodedPixel; S[gy*n+gx] = pixelSign; } - //OutStruct output = {bytesB, bytesS}; - //if(gx == n-1 && gy == n-1) - //std::cout << "Exit ZC\n"; - __visc__return(1, m); + __visc__return(1, bytesS); } void WrapperComputeZeroCrossings(float *L, size_t bytesL, @@ -453,7 +438,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL, __visc__bindIn(ZCNode, 6, 6, 0); // Bind m __visc__bindIn(ZCNode, 7, 7, 0); // Bind n - __visc__bindOut(ZCNode, 0, 0, 0); // bind output m + __visc__bindOut(ZCNode, 0, 0, 0); // bind output bytesS } @@ -519,9 +504,8 @@ void computeGradient(float *Is, size_t bytesIs, } G[gloc] = sqrt(Gx*Gx + Gy*Gy); - //G[gloc] = Gx*Gx + Gy*Gy; } - __visc__return(1, n); + __visc__return(1, bytesG); } void WrapperComputeGradient(float *Is, size_t bytesIs, @@ -543,7 +527,7 @@ void WrapperComputeGradient(float *Is, size_t bytesIs, __visc__bindIn(CGNode, 8, 8, 0); // Bind m __visc__bindIn(CGNode, 9, 9, 0); // Bind n - __visc__bindOut(CGNode, 0, 0, 0); // bind output m + __visc__bindOut(CGNode, 0, 0, 0); // bind output bytesG } /* @@ -558,7 +542,6 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, long m, long n) { __visc__hint(visc::CPU_TARGET); - //__visc__hint(visc::CPU_TARGET); __visc__attributes(1, G, 1, maxG); void* thisNode = __visc__getNode(); @@ -566,7 +549,6 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, long lx = __visc__getNodeInstanceID_x(thisNode); // threadIdx.x long dimx = __visc__getNumNodeInstances_x(thisNode); // blockDim.x - // Assume a single thread block // Thread block iterates over all elements for (int i = lx + dimx; i < m*n; i+= dimx) { @@ -584,57 +566,13 @@ void computeMaxGradientLeaf(float *G, size_t bytesG, *maxG = G[lx]; } - __visc__return(1, n); + __visc__return(1, bytesMaxG); } -/* - * Reduction - * G : input - * maxG: output - * Each static node processes 2*nodeDim elements - * Need 1D grid, a thread per 2 pixels - */ -//void computeMaxGradientLeaf(float *G, size_t bytesG, - //float *maxG, size_t bytesMaxG, - //int m, int n) { - - //__visc__hint(visc::DEVICE); - //TODO: maxG should be initialized to zero (MIN_BR) every time - //__visc__attributes(2, G, maxG, 1, maxG); - - //void* thisNode = __visc__getNode(); - //void* parentNode = __visc__getParentNode(thisNode); - - //int lx = __visc__getNodeInstanceID_x(thisNode); - //int px = __visc__getNodeInstanceID_x(parentNode); - //int dimx = __visc__getNumNodeInstances_x(thisNode); - - //int gid = lx + 2*px*dimx; - - //for (unsigned stride = dimx; stride > 32; stride >>= 1) { - //if ((gid + stride < m*n) && (lx < stride)) - //if (G[gid + stride] > G[gid]) - //G[gid] = G[gid + stride]; - //__visc__barrier(); - //} - - //for (unsigned stride = 32; stride >= 1; stride >>= 1) { - //if ((gid + stride < m*n) && (lx < stride)) - //if (G[gid + stride] > G[gid]) - //G[gid] = G[gid + stride]; - //} - - //if (lx == 0) - //__visc__atomic_max(maxG,G[gid]); - - //__visc__return(1, m); -//} - void computeMaxGradientTB(float *G, size_t bytesG, float *maxG, size_t bytesMaxG, long m, long n, long block_x) { - //__visc__hint(visc::DEVICE); __visc__hint(visc::CPU_TARGET); __visc__attributes(2, G, maxG, 1, maxG); void* CMGLeafNode = __visc__createNodeND(1, computeMaxGradientLeaf, block_x); @@ -645,7 +583,7 @@ void computeMaxGradientTB(float *G, size_t bytesG, __visc__bindIn(CMGLeafNode, 4, 4, 0); // Bind m __visc__bindIn(CMGLeafNode, 5, 5, 0); // Bind n - __visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output m + __visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output bytesMaxG } void WrapperComputeMaxGradient(float *G, size_t bytesG, @@ -663,7 +601,7 @@ void WrapperComputeMaxGradient(float *G, size_t bytesG, __visc__bindIn(CMGTBNode, 5, 5, 0); // Bind n __visc__bindIn(CMGTBNode, 6, 6, 0); // Bind block_x - __visc__bindOut(CMGTBNode, 0, 0, 0); // bind output m + __visc__bindOut(CMGTBNode, 0, 0, 0); // bind output bytesMaxG } /* Reject the zero crossings where the gradient is below a threshold */ @@ -682,7 +620,6 @@ void rejectZeroCrossings(float *S, size_t bytesS, float *E, size_t bytesE, long m, long n) { __visc__hint(visc::DEVICE); - //__visc__hint(visc::CPU_TARGET); __visc__attributes(3, S, G, maxG, 1, E); void* thisNode = __visc__getNode(); @@ -690,11 +627,10 @@ void rejectZeroCrossings(float *S, size_t bytesS, int gy = __visc__getNodeInstanceID_y(thisNode); float mG = *maxG; - //float mG = 1.39203; if ((gx < n) && (gy < m)) { E[gy*n+gx] = ((S[gy*n+gx] > 0.0) && (G[gy*n+gx] > THETA*mG)) ? 1.0 : 0.0 ; } - __visc__return(1, m); + __visc__return(1, bytesE); } void WrapperRejectZeroCrossings(float *S, size_t bytesS, @@ -716,7 +652,7 @@ void WrapperRejectZeroCrossings(float *S, size_t bytesS, __visc__bindIn(RZCNode, 8, 8 , 0); // Bind m __visc__bindIn(RZCNode, 9, 9, 0); // Bind n - __visc__bindOut(RZCNode, 0, 0, 0); // bind output m + __visc__bindOut(RZCNode, 0, 0, 0); // bind output bytesE } @@ -759,67 +695,59 @@ void edgeDetection(float *I, size_t bytesI, // 0 // Laplacian Inputs __visc__bindIn(LNode, 2 , 0, 1); // Bind Is - __visc__bindIn(LNode, 3 , 1, 1); // Bind bytesIs + __visc__edge(GSNode, LNode, 1, 0, 1, 1); // Get bytesIs __visc__bindIn(LNode, 16, 2, 1); // Bind B __visc__bindIn(LNode, 17, 3, 1); // Bind bytesB __visc__bindIn(LNode, 4 , 4, 1); // Bind L __visc__bindIn(LNode, 5 , 5, 1); // Bind bytesL -// __visc__bindIn(LNode, 22, 6, 1); // Bind m - __visc__edge(GSNode, LNode, 1, 0, 6, 1); // Get m + __visc__bindIn(LNode, 22, 6, 1); // Bind m __visc__bindIn(LNode, 23, 7, 1); // Bind n // Compute ZC Inputs __visc__bindIn(CZCNode, 4 , 0, 1); // Bind L - __visc__bindIn(CZCNode, 5 , 1, 1); // Bind bytesL + __visc__edge(LNode, CZCNode, 1, 0, 1, 1); // Get bytesL __visc__bindIn(CZCNode, 16, 2, 1); // Bind B __visc__bindIn(CZCNode, 17, 3, 1); // Bind bytesB __visc__bindIn(CZCNode, 6 , 4, 1); // Bind S __visc__bindIn(CZCNode, 7 , 5, 1); // Bind bytesS - //__visc__bindIn(CZCNode, 22, 6, 1); // Bind m - __visc__edge(LNode, CZCNode, 1, 0, 6, 1); // Get m + __visc__bindIn(CZCNode, 22, 6, 1); // Bind m __visc__bindIn(CZCNode, 23, 7, 1); // Bind n // Gradient Inputs __visc__bindIn(CGNode, 2 , 0, 1); // Bind Is - __visc__bindIn(CGNode, 3 , 1, 1); // Bind bytesIs + __visc__edge(GSNode, CGNode, 1, 1, 1, 1); // Get bytesIs __visc__bindIn(CGNode, 18, 2, 1); // Bind Sx __visc__bindIn(CGNode, 19, 3, 1); // Bind bytesSx __visc__bindIn(CGNode, 20, 4, 1); // Bind Sy __visc__bindIn(CGNode, 21, 5, 1); // Bind bytesSy __visc__bindIn(CGNode, 8 , 6, 1); // Bind G __visc__bindIn(CGNode, 9 , 7, 1); // Bind bytesG - __visc__bindIn(CGNode, 22, 8, 1); // Bind m - //__visc__edge(CZCNode, CGNode, 1, 0, 8, 1); // Get m - //__visc__bindIn(CGNode, 23, 9, 1); // Bind n - __visc__edge(GSNode, CGNode, 1, 1, 9, 1); // Get n + __visc__bindIn(CGNode, 22, 8, 1); // Bind m + __visc__bindIn(CGNode, 23, 9, 1); // Bind n // Max Gradient Inputs __visc__bindIn(CMGNode, 8 , 0, 1); // Bind G - __visc__bindIn(CMGNode, 9 , 1, 1); // Bind bytesG + __visc__edge(CGNode, CMGNode, 1, 0, 1, 1); // Get bytesG __visc__bindIn(CMGNode, 10, 2, 1); // Bind maxG __visc__bindIn(CMGNode, 11, 3, 1); // Bind bytesMaxG - __visc__bindIn(CMGNode, 22, 4, 1); // Bind m - //__visc__edge(CGNode, CMGNode, 1, 0, 4, 1); // Get m - //__visc__bindIn(CMGNode, 23, 5, 1); // Bind n - __visc__edge(CGNode, CMGNode, 1, 0, 5, 1); // Get n + __visc__bindIn(CMGNode, 22, 4, 1); // Bind m + __visc__bindIn(CMGNode, 23, 5, 1); // Bind n __visc__bindIn(CMGNode, 24, 6, 1); // Bind block_x __visc__bindIn(CMGNode, 25, 7, 1); // Bind grid_x // Reject ZC Inputs __visc__bindIn(RZCNode, 6 , 0, 1); // Bind S - __visc__bindIn(RZCNode, 7 , 1, 1); // Bind bytesS + __visc__edge(CZCNode, RZCNode, 1, 0, 1, 1); // Get bytesS __visc__bindIn(RZCNode, 8 , 2, 1); // Bind G __visc__bindIn(RZCNode, 9 , 3, 1); // Bind bytesG __visc__bindIn(RZCNode, 10, 4, 1); // Bind maxG - __visc__bindIn(RZCNode, 11, 5, 1); // Bind bytesMaxG + __visc__edge(CMGNode, RZCNode, 1, 0, 5, 1); // Get bytesMaxG __visc__bindIn(RZCNode, 12, 6, 1); // Bind E __visc__bindIn(RZCNode, 13, 7, 1); // Bind bytesE - //__visc__bindIn(RZCNode, 22, 8, 1); // Bind m - __visc__edge(CZCNode, RZCNode, 1, 0, 8, 1); // Get m - //__visc__bindIn(RZCNode, 23, 9, 1); // Bind n - __visc__edge(CMGNode, RZCNode, 1, 0, 9, 1); // Get n + __visc__bindIn(RZCNode, 22, 8, 1); // Bind m + __visc__bindIn(RZCNode, 23, 9, 1); // Bind n - __visc__bindOut(RZCNode, 0, 0, 1); // dummy bind output to get pipeline functionality + __visc__bindOut(RZCNode, 0, 0, 1); // Bind output } } @@ -833,18 +761,8 @@ void getNextFrame(VideoCapture& VC, Mat& F) { cvtColor( F, F, CV_BGR2GRAY ); F.convertTo(F, CV_32F, 1.0/255.0); - } -//void showInOut(Mat& Input, Mat& Output) { - //Mat in, out; - //resize(Input, in, Size(512, 768)); - //resize(Output, out, Size(512, 768)); - //imshow(input_window, in); - //imshow(output_window, out); -//} - - int main (int argc, char *argv[]) { struct pb_Parameters *params; @@ -877,11 +795,6 @@ int main (int argc, char *argv[]) { NUM_FRAMES = 600; std::cout << "Number of frames = " << NUM_FRAMES << "\n"; - // Used to store time after each frame computation is completed - //timeval *timeStamps = (timeval *)malloc(NUM_RUNS*NUM_FRAMES*sizeof(timeval)); - //struct timeval tv_start; - //unsigned long cnt = 0; // Counter, for the video frames - namedWindow(input_window, CV_WINDOW_AUTOSIZE); namedWindow(output_window, CV_WINDOW_AUTOSIZE); moveWindow(input_window, POSX_IN, POSY_IN); @@ -909,7 +822,6 @@ int main (int argc, char *argv[]) { pb_InitializeTimerSet(&timers); __visc__init(); - //pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE ); // copy A to device memory I_sz = src.size[0]*src.size[1]*sizeof(float); @@ -920,8 +832,6 @@ int main (int argc, char *argv[]) { 1, 1, 1, 1, 1, 1 }; size_t bytesB = 9*sizeof(float); - //Sx = [-1 0 1 ; -2 0 2 ; -1 0 1 ] - //Sy = [-1 -2 -1 ; 0 0 0 ; 1 2 1 ] float Sx[] = { -1, 0, 1, -2, 0, 2, -1, 0, 1 }; @@ -945,10 +855,6 @@ int main (int argc, char *argv[]) { // grid_x should be equal to the number of SMs on GPU. FTX 680 has 8 SMs grid_x = 1; - // Copy A and B^T into device memory - //pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE ); - - //showInOut(src, E); Mat in, out; resize(src, in, Size(HEIGHT, WIDTH)); resize(E, out, Size(HEIGHT, WIDTH)); @@ -956,7 +862,6 @@ int main (int argc, char *argv[]) { imshow(output_window, out); waitKey(0); - //NUM_FRAMES = 20; pb_SwitchToTimer( &timers, visc_TimerID_COMPUTATION ); struct InStruct* args = (struct InStruct*)malloc (sizeof(InStruct)); packData(args, (float*)src.data, I_sz, @@ -976,13 +881,6 @@ int main (int argc, char *argv[]) { // Check if the total elements is a multiple of block size assert(src.size[0]*src.size[1] % block_x == 0); - //imshow(input_window, src); - //imshow(output_window, E); - //waitKey(0); - - // Get the time just before computation starts - //gettimeofday(&tv_start,NULL); - for(unsigned j=0; j<NUM_RUNS; j++) { std::cout << "Run: " << j << "\n"; void* DFG = __visc__launch(1, edgeDetection, (void*)args); @@ -990,13 +888,9 @@ int main (int argc, char *argv[]) { cap = VideoCapture(params->inpFiles[0]); getNextFrame(cap, src); - //packData(args, A.data, BlockSize, &matB[i], BlockSize, &matC[i], BlockSize, BlockElements); - if(NUM_FRAMES >=2) { - //__visc__push(DFG, args); - //__visc__push(DFG, args); for(int i=0; i<NUM_FRAMES; i++) { - //std::cout << "Frame " << i << "\n"; + std::cout << "Frame " << i << "\n"; args->I = (float*) src.data; *maxG = 0.0; @@ -1014,53 +908,19 @@ int main (int argc, char *argv[]) { llvm_visc_track_mem(Sy, bytesSy); __visc__push(DFG, args); - __visc__pop(DFG); + void *ret = __visc__pop(DFG); + std::cout << "Returned size: " << *(size_t *)ret + << " expected " << I_sz << '\n'; - //llvm_visc_request_mem(E.data, I_sz); - //std::cout << "Show E" << "\n"; - //imshow(window_name, E); - //waitKey(0); - //llvm_visc_request_mem(src.data, I_sz); - //llvm_visc_request_mem(Is.data, I_sz); - //llvm_visc_request_mem(L.data, I_sz); - //llvm_visc_request_mem(S.data, I_sz); - //llvm_visc_request_mem(G.data, I_sz); llvm_visc_request_mem(maxG, bytesMaxG); llvm_visc_request_mem(E.data, I_sz); - //std::cout << "src.data = " << (float*)src.data << "\n"; - //std::cout << "Is.data = " << (float*)Is.data << "\n"; - //std::cout << "L.data = " << (float*)L.data << "\n"; - //std::cout << "S.data = " << (float*)S.data << "\n"; - //std::cout << "G.data = " << (float*)G.data << "\n"; - //std::cout << "E.data = " << (float*)E.data << "\n"; - //std::cout << "Max G = " << *maxG << "\n"; - - //gettimeofday(&timeStamps[cnt], NULL); - //cnt++; + Mat in, out; resize(src, in, Size(HEIGHT, WIDTH)); - //std::cout << "Show E\n"; resize(E, out, Size(HEIGHT, WIDTH)); imshow(output_window, out); imshow(input_window, in); waitKey(1); - //waitKey(0); - //std::cout << "Show Is\n"; - //resize(Is, out, Size(HEIGHT, WIDTH)); - //imshow(output_window, out); - //waitKey(0); - //std::cout << "Show L\n"; - //resize(L, out, Size(HEIGHT, WIDTH)); - //imshow(output_window, out); - //waitKey(0); - //std::cout << "Show S\n"; - //resize(S, out, Size(HEIGHT, WIDTH)); - //imshow(output_window, out); - //waitKey(0); - //std::cout << "Show G\n"; - //resize(G, out, Size(HEIGHT, WIDTH)); - //imshow(output_window, out); - //waitKey(0); llvm_visc_untrack_mem(src.data); llvm_visc_untrack_mem(Is.data); @@ -1075,72 +935,17 @@ int main (int argc, char *argv[]) { llvm_visc_untrack_mem(Sy); getNextFrame(cap, src); - } - //__visc__pop(DFG); - //__visc__pop(DFG); } else { __visc__push(DFG, args); __visc__pop(DFG); } - - __visc__wait(DFG); } - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); - - //const char *fn = "timestamps.txt"; - //std::ofstream outfile; - //outfile.open(fn); - //if (!outfile.is_open()) { - //std::cout << "Failed to open " << fn << " for writing\n"; - //} - //for (unsigned long i = 0; i < cnt; i++) { - //double elapsed = (timeStamps[i].tv_sec - tv_start.tv_sec) + - //((timeStamps[i].tv_usec - tv_start.tv_usec)/1000000.0); - //outfile << elapsed << "\n"; - //} - //free(timeStamps); - //outfile.close(); __visc__cleanup(); - - //if (params->outFile) { - - /* Write C to file */ - //pb_SwitchToTimer(&timers, pb_TimerID_IO); - //writeColMajorMatrixFile(params->outFile, - //src.size[0], src.size[1], matE); - //} - - //std::cout << "Show Is" << "\n"; - //Mat output(src.size[0], src.size[1], CV_32F); - //imshow(output_window, Is); - //waitKey(0); - - //std::cout << "Show G" << "\n"; - //imshow(output_window, L); - //waitKey(0); - - //std::cout << "Show L" << "\n"; - //imshow(output_window, S); - //waitKey(0); - - //std::cout << "Show S" << "\n"; - //imshow(output_window, G); - //waitKey(0); - - //std::cout << "Show E" << "\n"; - //imshow(output_window, E); - //waitKey(0); - - //double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - //std::cout<< "GFLOPs = " << 2.* src.size[0] * src.size[1] * src.size[1]/GPUtime/1e9 << std::endl; pb_FreeParameters(params); - return 0; }