diff --git a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc
index fa702d4841ed642548bf7148734d10c2d9508bb4..2ddc182d84d63c422994af34371a2409bfc0d51a 100644
--- a/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc
+++ b/hpvm/test/parboil/benchmarks/pipeline/src/visc_parallel/main.cc
@@ -72,6 +72,10 @@ std::string output_window = "GPU Pipeline - Edge Mapping";
 
 extern "C" {
 
+struct RetStruct {
+  size_t bytesRet;
+}; 
+
 struct __attribute__((__packed__)) InStruct {
   float* I ;
   size_t bytesI;
@@ -201,7 +205,7 @@ void gaussianSmoothing(float *I, size_t bytesI,
   
     Is[gloc] = smoothedVal;
   }
-  __visc__return(2, m, n);
+  __visc__return(1, bytesIs);
 }
 
 void WrapperGaussianSmoothing(float *I, size_t bytesI,
@@ -220,8 +224,7 @@ void WrapperGaussianSmoothing(float *I, size_t bytesI,
   __visc__bindIn(GSNode, 6, 6, 0); // Bind m
   __visc__bindIn(GSNode, 7, 7, 0); // Bind n
 
-  __visc__bindOut(GSNode, 0, 0, 0); // bind output m
-  __visc__bindOut(GSNode, 1, 1, 0); // bind output n
+  __visc__bindOut(GSNode, 0, 0, 0); // bind output bytesIs
 }
 
 
@@ -312,7 +315,7 @@ void laplacianEstimate(float *Is, size_t bytesIs,
   //OutStruct output = {bytesB, bytesL};
   //if(gx == m-1 && gy == n-1)
     //std::cout << "Exit laplacian\n";
-  __visc__return(1, m);
+  __visc__return(1, bytesL);
 }
 
 void WrapperlaplacianEstimate(float *Is, size_t bytesIs,
@@ -331,7 +334,7 @@ void WrapperlaplacianEstimate(float *Is, size_t bytesIs,
   __visc__bindIn(LNode, 6, 6, 0); // Bind m
   __visc__bindIn(LNode, 7, 7, 0); // Bind n
 
-  __visc__bindOut(LNode, 0, 0, 0); // bind output m
+  __visc__bindOut(LNode, 0, 0, 0); // bind output bytesL
 
 }
 
@@ -434,7 +437,7 @@ void computeZeroCrossings(float *L, size_t bytesL,
   //OutStruct output = {bytesB, bytesS};
   //if(gx == n-1 && gy == n-1)
     //std::cout << "Exit ZC\n";
-  __visc__return(1, m); 
+  __visc__return(1, bytesS); 
 }
 
 void WrapperComputeZeroCrossings(float *L, size_t bytesL,
@@ -453,7 +456,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL,
   __visc__bindIn(ZCNode, 6, 6, 0); // Bind m
   __visc__bindIn(ZCNode, 7, 7, 0); // Bind n
 
-  __visc__bindOut(ZCNode, 0, 0, 0); // bind output m
+  __visc__bindOut(ZCNode, 0, 0, 0); // bind output bytesS
 
 }
 
@@ -521,7 +524,7 @@ void computeGradient(float *Is, size_t bytesIs,
     G[gloc] = __visc__sqrt(Gx*Gx + Gy*Gy);
     //G[gloc] = Gx*Gx + Gy*Gy;
   }
-  __visc__return(1, n);
+  __visc__return(1, bytesG);
 }
 
 void WrapperComputeGradient(float *Is, size_t bytesIs,
@@ -543,7 +546,7 @@ void WrapperComputeGradient(float *Is, size_t bytesIs,
   __visc__bindIn(CGNode, 8, 8, 0); // Bind m
   __visc__bindIn(CGNode, 9, 9, 0); // Bind n
 
-  __visc__bindOut(CGNode, 0, 0, 0); // bind output m
+  __visc__bindOut(CGNode, 0, 0, 0); // bind output bytesG
 }
 
 /* 
@@ -583,7 +586,7 @@ void computeMaxGradientLeaf(float *G, size_t bytesG,
     *maxG = G[lx];
   }
 
-  __visc__return(1, n);
+  __visc__return(1, bytesMaxG);
 }
 
 /* 
@@ -644,7 +647,7 @@ void computeMaxGradientTB(float *G, size_t bytesG,
   __visc__bindIn(CMGLeafNode, 4, 4, 0); // Bind m
   __visc__bindIn(CMGLeafNode, 5, 5, 0); // Bind n
 
-  __visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output m
+  __visc__bindOut(CMGLeafNode, 0, 0, 0); // bind output bytesMaxG
 }
 
 void WrapperComputeMaxGradient(float *G, size_t bytesG,
@@ -662,7 +665,7 @@ void WrapperComputeMaxGradient(float *G, size_t bytesG,
   __visc__bindIn(CMGTBNode, 5, 5, 0); // Bind n
   __visc__bindIn(CMGTBNode, 6, 6, 0); // Bind block_x
 
-  __visc__bindOut(CMGTBNode, 0, 0, 0); // bind output m
+  __visc__bindOut(CMGTBNode, 0, 0, 0); // bind output bytesMaxG
 }
 
 /* Reject the zero crossings where the gradient is below a threshold */
@@ -693,7 +696,7 @@ void rejectZeroCrossings(float *S, size_t bytesS,
   if ((gx < n) && (gy < m)) {
     E[gy*n+gx] = ((S[gy*n+gx] > 0.0) && (G[gy*n+gx] > THETA*mG)) ? 1.0 : 0.0 ;
   }
-  __visc__return(1, m);
+  __visc__return(1, bytesE);
 }
 
 void WrapperRejectZeroCrossings(float *S, size_t bytesS,
@@ -715,7 +718,7 @@ void WrapperRejectZeroCrossings(float *S, size_t bytesS,
   __visc__bindIn(RZCNode, 8, 8 , 0); // Bind m
   __visc__bindIn(RZCNode, 9, 9, 0); // Bind n
 
-  __visc__bindOut(RZCNode, 0, 0, 0); // bind output m
+  __visc__bindOut(RZCNode, 0, 0, 0); // bind output bytesE
 }
 
 
@@ -758,65 +761,57 @@ void edgeDetection(float *I, size_t bytesI, // 0
 
   // Laplacian Inputs
   __visc__bindIn(LNode, 2 , 0, 1); // Bind Is
-  __visc__bindIn(LNode, 3 , 1, 1); // Bind bytesIs
+  __visc__edge(GSNode, LNode, 1, 0, 1, 1); // Get bytesIs
   __visc__bindIn(LNode, 16, 2, 1); // Bind B
   __visc__bindIn(LNode, 17, 3, 1); // Bind bytesB
   __visc__bindIn(LNode, 4 , 4, 1); // Bind L
   __visc__bindIn(LNode, 5 , 5, 1); // Bind bytesL
-//  __visc__bindIn(LNode, 22, 6, 1); // Bind m
-  __visc__edge(GSNode, LNode, 1, 0, 6, 1); // Get m
+  __visc__bindIn(LNode, 22, 6, 1); // Bind m
   __visc__bindIn(LNode, 23, 7, 1); // Bind n
 
   // Compute ZC Inputs
   __visc__bindIn(CZCNode, 4 , 0, 1); // Bind L
-  __visc__bindIn(CZCNode, 5 , 1, 1); // Bind bytesL
+  __visc__edge(LNode, CZCNode, 1, 0, 1, 1); // Get bytesL
   __visc__bindIn(CZCNode, 16, 2, 1); // Bind B
   __visc__bindIn(CZCNode, 17, 3, 1); // Bind bytesB
   __visc__bindIn(CZCNode, 6 , 4, 1); // Bind S
   __visc__bindIn(CZCNode, 7 , 5, 1); // Bind bytesS
-  //__visc__bindIn(CZCNode, 22, 6, 1); // Bind m
-  __visc__edge(LNode, CZCNode, 1, 0, 6, 1); // Get m
+  __visc__bindIn(CZCNode, 22, 6, 1); // Bind m
   __visc__bindIn(CZCNode, 23, 7, 1); // Bind n
 
   // Gradient Inputs
   __visc__bindIn(CGNode, 2 , 0, 1); // Bind Is
-  __visc__bindIn(CGNode, 3 , 1, 1); // Bind bytesIs
+  __visc__edge(GSNode, CGNode, 1, 0, 1, 1); // Get bytesIs
   __visc__bindIn(CGNode, 18, 2, 1); // Bind Sx
   __visc__bindIn(CGNode, 19, 3, 1); // Bind bytesSx
   __visc__bindIn(CGNode, 20, 4, 1); // Bind Sy
   __visc__bindIn(CGNode, 21, 5, 1); // Bind bytesSy
   __visc__bindIn(CGNode, 8 , 6, 1); // Bind G
   __visc__bindIn(CGNode, 9 , 7, 1); // Bind bytesG
- __visc__bindIn(CGNode, 22, 8, 1); // Bind m
-  //__visc__edge(CZCNode, CGNode, 1, 0, 8, 1); // Get m
-  //__visc__bindIn(CGNode, 23, 9, 1); // Bind n
-  __visc__edge(GSNode, CGNode, 1, 1, 9, 1); // Get n
+  __visc__bindIn(CGNode, 22, 8, 1); // Bind m
+  __visc__bindIn(CGNode, 23, 9, 1); // Bind n
 
   // Max Gradient Inputs
   __visc__bindIn(CMGNode, 8 , 0, 1); // Bind G
-  __visc__bindIn(CMGNode, 9 , 1, 1); // Bind bytesG
+  __visc__edge(CGNode, CMGNode, 1, 0, 1, 1); // Get bytesG
   __visc__bindIn(CMGNode, 10, 2, 1); // Bind maxG
   __visc__bindIn(CMGNode, 11, 3, 1); // Bind bytesMaxG
- __visc__bindIn(CMGNode, 22, 4, 1);  // Bind m
-  //__visc__edge(CGNode, CMGNode, 1, 0, 4, 1); // Get m
-  //__visc__bindIn(CMGNode, 23, 5, 1); // Bind n
-  __visc__edge(CGNode, CMGNode, 1, 0, 5, 1); // Get n
+  __visc__bindIn(CMGNode, 22, 4, 1);  // Bind m
+  __visc__bindIn(CMGNode, 23, 5, 1); // Bind n
   __visc__bindIn(CMGNode, 24, 6, 1); // Bind block_x
   __visc__bindIn(CMGNode, 25, 7, 1); // Bind grid_x
 
   // Reject ZC Inputs
   __visc__bindIn(RZCNode, 6 , 0, 1); // Bind S
-  __visc__bindIn(RZCNode, 7 , 1, 1); // Bind bytesS
+  __visc__edge(CZCNode, RZCNode, 1, 0, 1, 1); // Get bytesS
   __visc__bindIn(RZCNode, 8 , 2, 1); // Bind G
   __visc__bindIn(RZCNode, 9 , 3, 1); // Bind bytesG
   __visc__bindIn(RZCNode, 10, 4, 1); // Bind maxG
-  __visc__bindIn(RZCNode, 11, 5, 1); // Bind bytesMaxG
+  __visc__edge(CMGNode, RZCNode, 1, 0, 5, 1); // Get bytesMaxG
   __visc__bindIn(RZCNode, 12, 6, 1); // Bind E
   __visc__bindIn(RZCNode, 13, 7, 1); // Bind bytesE
-  //__visc__bindIn(RZCNode, 22, 8, 1); // Bind m
-  __visc__edge(CZCNode, RZCNode, 1, 0, 8, 1); // Get m
-  //__visc__bindIn(RZCNode, 23, 9, 1); // Bind n
-  __visc__edge(CMGNode, RZCNode, 1, 0, 9, 1); // Get n
+  __visc__bindIn(RZCNode, 22, 8, 1); // Bind m
+  __visc__bindIn(RZCNode, 23, 9, 1); // Bind n
 
   __visc__bindOut(RZCNode, 0, 0, 1); // dummy bind output to get pipeline functionality
 }
@@ -995,7 +990,7 @@ int main (int argc, char *argv[]) {
         //__visc__push(DFG, args);
         //__visc__push(DFG, args);
         for(int i=0; i<NUM_FRAMES; i++) {
-          //std::cout << "Frame " << i << "\n";
+          std::cout << "Frame " << i << "\n";
           args->I = (float*) src.data;
 
           *maxG = 0.0;
@@ -1042,6 +1037,7 @@ int main (int argc, char *argv[]) {
           resize(E, out, Size(HEIGHT, WIDTH));
           imshow(output_window, out);
           imshow(input_window, in);
+          std::cout << "Returned size: " << args << '\n';
           waitKey(1);
           //waitKey(0);
           //std::cout << "Show Is\n";