diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
index 5c48b3b01f2641576e6ac725ae0a81f03d6a5dbb..cf8e651357f4053b09449822ebe1ed936cd69cdd 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
@@ -6,6 +6,7 @@
 #include "tensor_runtime.h"
 #include "utils.h"
 
+#include "tensor_custom_ops_cpu.h"
 
 
 void testTensorGemm(){
@@ -1098,6 +1099,64 @@ void testSampling_1_1(){
 
 
 
+void testTensorArgMax(){
+
+  Tensor* input = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 4, 3, 1, 1);
+ 
+  float* host_ptr = (float*) ((struct Tensor*) input)->host_data;
+
+  // Input 0
+  host_ptr[0] = 1;
+  host_ptr[1] = 7; // highest - max index = 1
+  host_ptr[2] = 3;
+
+  // Input 1
+  host_ptr[3] = 3;
+  host_ptr[4] = 3;
+  host_ptr[5] = 8; // highest - max index = 2
+
+  // Input 2
+  host_ptr[6] = 2;
+  host_ptr[7] = 5;
+  host_ptr[8] = 9; // highest - max index = 2
+
+  // Input 3
+  host_ptr[9] = 11; // highest - max index = 0
+  host_ptr[10] = 2;
+  host_ptr[11] = 8;
+
+  void* argmax_out = tensorArgMax(input);
+
+  printTensorValues(argmax_out);
+    
+}
+
+
+
+void testTensorSelect(){
+
+  
+}
+
+
+void testTensorContract(){
+
+  
+}
+
+
+
+void testNewTensorOps(){
+
+  testTensorArgMax();
+  testTensorSelect();
+  testTensorContract();
+  
+}
+
+
+
+
 
 
 
@@ -1137,21 +1196,22 @@ int main(){
 
   // testPerforation2();
 
-
-  //testSampling();
   
 
-  //testSampling2();
-
 
-  //testSampling3();
-  
+  /********* SAMPLING TESTS ****
 
   testSampling_3_3();
 
   
   testSampling_1_1();
 
+  *************/
+
+
+  testNewTensorOps();
+
+  
 
 
   //testQuantization();
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac5ee1004e9231a532848ead63af1a0c54212716
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h
@@ -0,0 +1,41 @@
+
+
+#include "tensor.h"
+
+
+void* tensorArgMax(Tensor* input_ptr){
+
+  Tensor* input = (Tensor*) input_ptr;
+  float* host_ptr = (float*) input->host_data;
+
+  int batch_size = input->dims.dim_sizes[0];
+  int channels = input->dims.dim_sizes[1];
+
+  Tensor* output = (Tensor *) create4DTensor(0, 0, batch_size, 1, 1, 1);
+  changeTensorPlacement(output, HOST);
+    
+  float* out_ptr = (float*) output->host_data;
+  
+  for(int i = 0; i < batch_size; i++){
+
+    int start = i * channels;
+    float max_index = 0;
+    float max_val = host_ptr[start];
+    for(int j = 0; j < channels; j++){
+      
+      int index = start + j;
+      //printf ("index = %d \n", index);
+      float val = host_ptr[index];
+      if (val > max_val){
+	max_val = val;
+	max_index = j;
+      }	
+    }
+
+    out_ptr[i] = max_index;
+  }
+  
+
+  return output;
+  
+}