diff --git a/llvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll b/llvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll
index 89c8da90f8ab740062bd84cdd365baa67311a7a4..1852fc070fc7ecc60dd610469805482b0c0d3201 100644
--- a/llvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll
+++ b/llvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll
@@ -1,5 +1,5 @@
-; ModuleID = '/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt//lib/tensor_runtime.bc'
-source_filename = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt//tensor_runtime/include/tensor_signatures.cc"
+; ModuleID = 'lib/tensor_runtime.bc'
+source_filename = "tensor_runtime/include/tensor_signatures.cc"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -44,7 +44,6 @@ entry:
   %ConvLayer = alloca i8*, align 8
   %FCLayer = alloca i8*, align 8
   %ConvLayer2 = alloca i8*, align 8
-  %ConvLayer3 = alloca i8*, align 8
   %FCLayer2 = alloca i8*, align 8
   %AddWrapper = alloca i8*, align 8
   %ReluWrapper = alloca i8*, align 8
@@ -52,6 +51,14 @@ entry:
   %BatchNormWrapper = alloca i8*, align 8
   %PoolingWrapper = alloca i8*, align 8
   %softmaxWrapper = alloca i8*, align 8
+  %tensorFft = alloca i8*, align 8
+  %tensorReduce = alloca i8*, align 8
+  %tensorProjectiveT = alloca i8*, align 8
+  %tensorMap1 = alloca i8*, align 8
+  %tensorMap2 = alloca i8*, align 8
+  %tensorMap3 = alloca i8*, align 8
+  %tensorStencil = alloca i8*, align 8
+  %tensorCosineT = alloca i8*, align 8
   store i8* bitcast (void (i32)* @llvm_hpvm_initTensorRt to i8*), i8** %initRT, align 8
   store i8* bitcast (void ()* @llvm_hpvm_cleanupTensorRt to i8*), i8** %cleanRT, align 8
   store i8* bitcast (void (i32)* @llvm_hpvm_initApproxhpvmRt to i8*), i8** %initApproxRT, align 8
@@ -90,7 +97,6 @@ entry:
   store i8* bitcast (i8* (i8*, float, float, i8*, float, float, i8*, float, float, i32, i32, i32, i32, i32, i32, i32, float, float, i32)* @ConvLayer_PROMISE to i8*), i8** %ConvLayer, align 8
   store i8* bitcast (i8* (i8*, float, float, i8*, float, float, i8*, float, float, i32, float, float, i32)* @FCLayer_PROMISE to i8*), i8** %FCLayer, align 8
   store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float)* @wrapper_ConvLayer to i8*), i8** %ConvLayer2, align 8
-  store i8* bitcast (i8* (i8*, i8*, i8*, i32, i32, i32, i32, i32, i32)* @wrapper_tensorGroupConvolution to i8*), i8** %ConvLayer3, align 8
   store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i32, float, float)* @wrapper_FCLayer to i8*), i8** %FCLayer2, align 8
   store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorAdd to i8*), i8** %AddWrapper, align 8
   store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorRelu to i8*), i8** %ReluWrapper, align 8
@@ -98,6 +104,14 @@ entry:
   store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i8*, i8*, double)* @wrapper_tensorBatchNorm to i8*), i8** %BatchNormWrapper, align 8
   store i8* bitcast (i8* (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)* @wrapper_tensorPooling to i8*), i8** %PoolingWrapper, align 8
   store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorSoftmax to i8*), i8** %softmaxWrapper, align 8
+  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorFft to i8*), i8** %tensorFft, align 8
+  store i8* bitcast (i8* (i8*, i8*, i32, i8*)* @wrapper_tensorReduce to i8*), i8** %tensorReduce, align 8
+  store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorProjectiveT to i8*), i8** %tensorProjectiveT, align 8
+  store i8* bitcast (i8* (i8*, i8*, i8*)* @wrapper_tensorMap1 to i8*), i8** %tensorMap1, align 8
+  store i8* bitcast (i8* (i8*, i8*, i8*, i8*)* @wrapper_tensorMap2 to i8*), i8** %tensorMap2, align 8
+  store i8* bitcast (i8* (i8*, i8*, i8*, i8*, i8*)* @wrapper_tensorMap3 to i8*), i8** %tensorMap3, align 8
+  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorStencil to i8*), i8** %tensorStencil, align 8
+  store i8* bitcast (i8* (i8*, i8*)* @wrapper_tensorCosineT to i8*), i8** %tensorCosineT, align 8
   ret void
 }
 
@@ -177,8 +191,6 @@ declare i8* @FCLayer_PROMISE(i8*, float, float, i8*, float, float, i8*, float, f
 
 declare i8* @wrapper_ConvLayer(i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, float, float) #1
 
-declare i8* @wrapper_tensorGroupConvolution(i8*, i8*, i8*, i32, i32, i32, i32, i32, i32) #1
-
 declare i8* @wrapper_FCLayer(i8*, i8*, i8*, i8*, i32, float, float) #1
 
 declare i8* @wrapper_tensorAdd(i8*, i8*, i8*) #1
@@ -193,6 +205,22 @@ declare i8* @wrapper_tensorPooling(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
 
 declare i8* @wrapper_tensorSoftmax(i8*, i8*) #1
 
+declare i8* @wrapper_tensorFft(i8*, i8*) #1
+
+declare i8* @wrapper_tensorReduce(i8*, i8*, i32, i8*) #1
+
+declare i8* @wrapper_tensorProjectiveT(i8*, i8*, i8*) #1
+
+declare i8* @wrapper_tensorMap1(i8*, i8*, i8*) #1
+
+declare i8* @wrapper_tensorMap2(i8*, i8*, i8*, i8*) #1
+
+declare i8* @wrapper_tensorMap3(i8*, i8*, i8*, i8*, i8*) #1
+
+declare i8* @wrapper_tensorStencil(i8*, i8*) #1
+
+declare i8* @wrapper_tensorCosineT(i8*, i8*) #1
+
 attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_runtime.h
index eae57400bde7776efd91527e58cdf646fde7f15c..163f20f8a7d65bf920dd071b56d9e9ea0a900075 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_runtime.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_runtime.h
@@ -14,9 +14,10 @@ void *tensorMap2(void *f2, void *i1, void *i2);
 void *tensorMap3(void *f3, void *i1, void *i2, void *i3);
 
 // ***                      Wrapper API declaration                      *** //
+extern "C" {
 void *wrapper_tensorFft(const char *hpvm_node_id, void *input);
 void *wrapper_tensorReduce(
-    const char *hpvm_node_id, void *input, size_t axis, void *func);
+    const char *hpvm_node_id, void *input, int axis, void *func);
 void *wrapper_tensorProjectiveT(
     const char *hpvm_node_id, void *input, void *transformation);
 void *wrapper_tensorMap1(const char *hpvm_node_id, void *func, void *input);
@@ -29,5 +30,6 @@ void *wrapper_tensorMap3(
 // Tentative
 void *wrapper_tensorStencil(const char *hpvm_node_id, void *input);
 void *wrapper_tensorCosineT(const char *hpvm_node_id, void *input);
+}
 
 #endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
index e8947881765637d68ca9d95d716c97d486e8380a..61a895d63b9bcddcd18975eb54d6209771d645d0 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
@@ -55,4 +55,12 @@ void dummyFunction(){
   void* PoolingWrapper = (void*) &wrapper_tensorPooling;    
   void* softmaxWrapper = (void*) &wrapper_tensorSoftmax;    
 
+  void* tensorFft = (void *) &wrapper_tensorFft;
+  void* tensorReduce = (void *) &wrapper_tensorReduce;
+  void* tensorProjectiveT = (void *) &wrapper_tensorProjectiveT;
+  void* tensorMap1 = (void *) &wrapper_tensorMap1;
+  void* tensorMap2 = (void *) &wrapper_tensorMap2;
+  void* tensorMap3 = (void *) &wrapper_tensorMap3;
+  void* tensorStencil = (void *) &wrapper_tensorStencil;
+  void* tensorCosineT = (void *) &wrapper_tensorCosineT;
 }
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
index 497d9ec38bfbff86afc21e290c29c589fa12fc7c..a9235405d16d9f61572254ac82040fb2161cabd8 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
@@ -122,7 +122,7 @@ void *wrapper_tensorFft(const char *hpvm_node_id, void *input) {
 }
 
 void *wrapper_tensorReduce(
-    const char *hpvm_node_id, void *input, size_t axis, void *func) {
+    const char *hpvm_node_id, void *input, int axis, void *func) {
   GPUNodeConfiguration *GPUConf =
       (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
   std::vector<std::pair<