diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu
index 78c48d06045fdc62c6ff7681d689f55398f1d05f..ed59139f71043d03591f78c2e5d682e580b6264a 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques2.cu
@@ -222,9 +222,13 @@ void convToGemmPerfRowHalf(__half * const __restrict__ output,
   const int c = tx % (C * H_eff * W_out) / (H_eff * W_out); //output chan number
   const int h = tx % (H_eff * W_out) / W_out; //output height index (row number)
   const int w = tx % W_out; //output width index (col number)
-  int past_start = (h % (x - 1) >= (x - 1 - start));
-  const int inH = (h / (x - 1) * x + h % (x-1) +
-		   past_start) * V_stride - V_pad; //input height index (row number)
+  int h_index;
+  if(h < start) {
+    h_index = h;
+  } else {
+    h_index = ((h - start + 1) * x) / (x - 1) + ((h - start + 1) * x) % (x - 1) + start - 1;
+  }
+  const int inH = h_index * V_stride - V_pad;
   const int inW = w * H_stride - H_pad; //input width index (col number)
   if(n < N) { //is thread id within bounds?
     for(int i = 0; i < KH; i++) {
@@ -264,28 +268,26 @@ void approxInterpolateRowHalf(int N,
     int row = ((i % (c * h * w)) % (h * w)) / w;
     int ch = (i % (c * h * w)) / (h * w);
     int n = i / (c * h * w);
-    int past_start = ((row % x) >= (x - 1 - start));
 
-    if(row == h-1)
+    if(row < start) {
+        new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] 
+            = old_data[ch * (b * old_h * w) + n * (old_h * w) + (row * w) + col];
+    } else if(row == h-1) {
       new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] =
-	old_data[ch * (b * old_h * w) + n * (old_h * w) + (old_h - 1) * (w) + col];
-    else if (row == 0)
-      new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] =
-	old_data[ch * (b * old_h * w) + n * (old_h * w) + 0 * (w) + col];
-    else if(row % x == x - 1 - start){
-      int past_startO = ((row - 1) % x) > (x - 1 - start);
-      int oldIdx1 = ch * (b * old_h * w) + n * (old_h * w) +
-	((x-1) * ((row - 1) / x) + (row-1) % x - past_startO) * (w) + col;
-
+	    old_data[ch * (b * old_h * w) + n * (old_h * w) + (old_h - 1) * (w) + col];
+    } else if (row == 0) {
       new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] =
-	__hdiv(__hadd(old_data[oldIdx1], old_data[oldIdx1 + 1 * w]), 2);
+    	old_data[ch * (b * old_h * w) + n * (old_h * w) + 0 * (w) + col];
+    } else if((row - start) % x == 0) {
+        int row_index = row - ((row + 1 - start) / x);
+        int output_index = ch * (b * old_h * w) + n * (old_h * w) + (row_index * w) + col;
+        new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = 
+                __hdiv(__hadd(old_data[output_index], old_data[output_index - w]), 2);
+    } else {
+        int row_index = row - ((row + 1 - start) / x) - ((row + 1 - start) % x > 0);
+         int output_index = ch * (b * old_h * w) + n * (old_h * w) + (row_index * w) + col;
+         new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = old_data[output_index];
     }
-    else
-      new_data[n * (c * h * w) + ch * (h * w) + row * (w) + col] =
-	old_data[ch * (b * old_h * w) + n * (old_h * w) +
-		 ((x-1) * (row / x) + row % x - past_start )  * (w) + col];
-
-
   }
 
 }
@@ -1293,15 +1295,12 @@ void* tensorConvApproxHalf2(void* input_ptr, void* filter_ptr,
   const int KW = filter->dims.dim_sizes[3];
 
   h = (2 * vertical_pad + input->dims.dim_sizes[2] - KH) / vertical_stride + 1;
-  long int h_eff = h - h / row;
-  if(h % row > row - 1 - offset)
-    h_eff = h_eff - 1;
+  int rem_row = (h - offset) % row > 0;
+  int h_eff = h - ((h - offset) / row) - rem_row;
 
   w = (2 * horizontal_pad + input->dims.dim_sizes[3] - KW) / horizontal_stride + 1;
-  long int w_eff = w - w / col;
-  if(w % col > col - 1 - offset)
-    w_eff = w_eff - 1;
-
+  int rem_col = (w - offset) % col > 0;
+  int w_eff = w - ((w - offset) / col) - rem_col;
 
   INFO("input: %d %d %d %d\n", input->dims.dim_sizes[0], input->dims.dim_sizes[1],
        input->dims.dim_sizes[2], input->dims.dim_sizes[3]);