diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
index 6fab7e10fe683bb21b08569ca195b9166768daea..3c2edfc9517dd59da496dc9b1fa4a1b2873607db 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
@@ -41,6 +41,7 @@ public:
 
     float* data_ptr = (float*) res->host_data;
     for (unsigned int i = 0; i < res->num_elems; i++){
+      //printf("**diff value = %f ", std::abs(data_ptr[i] - expected_result[i]));
       if (std::abs(data_ptr[i] - expected_result[i]) > epsilon){
 	failed_tests += 1;
 	failed_test_ids.push_back(test_name);
@@ -68,7 +69,11 @@ public:
     printf("-- Total tests :=  %d \n", total_tests);
     printf("-- Tests Passed := %d \n", passed_tests);
     printf("-- Tests Failed := %d \n", failed_tests);
-   
+
+    printf("\n\n Tests that failed : \n\n");
+    for (int i = 0; i < failed_test_ids.size(); i++){
+      printf("*** Test = %s \n", failed_test_ids[i].c_str());
+    }
   }
   
 };
@@ -122,7 +127,9 @@ void testTensorHgemm(UnitTestResults& unitTestResults){
   void* rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3);
   fillTensorWithOnes(rhs);
   
-  void* output = tensorHalfGemm(lhs, rhs);   
+  void* output = tensorHalfGemm(lhs, rhs);
+  convertToFP32((struct Tensor*) output);
+
   printTensorValues(output);
 
   const float expected_result[15] = {4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 20, 20, 20};
@@ -132,7 +139,7 @@ void testTensorHgemm(UnitTestResults& unitTestResults){
 
 
 
-void testTensorSgemm(){
+void testTensorSgemm(UnitTestResults& unitTestResults){
 
   printf("***** TensorSgemm ***** \n\n");
   void* lhs_ptr = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1);
@@ -149,6 +156,11 @@ void testTensorSgemm(){
   
   void* output = tensorGemmGPU(lhs, rhs);
   printTensorValues(output);
+
+  const float expected_result[15] = {4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 20, 20, 20};
+
+  unitTestResults.evalTestResult((Tensor*) output, expected_result, 15, 0.01, "Sgemm");
+
 }
 
 
@@ -344,6 +356,8 @@ void testTensorHalfGroupConv(){
 	                             0, 0,
 				     1, 1,
 				     conv_mode, conv_groups);
+  
+  convertToFP32((struct Tensor*) conv_out);
 
   printTensorValues(conv_out);
 }
@@ -375,6 +389,8 @@ void testTensorHalfPooling(){
   }
 
   void* output = tensorPooling(x, 0, 2, 2, 0, 0, 2, 2);
+  convertToFP32((struct Tensor*) output);
+
   printTensorValues(output);
 }
 
@@ -398,7 +414,8 @@ void testTensorBatchNorm(){
 
   double epsilon = 1;
   // NOTE: result = X - mean / sqrt(epsilon + variance)
-  void* output = tensorBatchNorm(x, gamma, beta, mean, variance, 1);  
+  void* output = tensorBatchNorm(x, gamma, beta, mean, variance, 1);
+
   printTensorValues(output);  
 }
 
@@ -424,6 +441,7 @@ void testTensorHalfBatchNorm(){
   double epsilon = 1;
   // NOTE: result = X - mean / sqrt(epsilon + variance)
   void* output = tensorBatchNorm(x, gamma, beta, mean, variance, 1);  
+  convertToFP32((struct Tensor*) output);
 
   printTensorValues(output);  
 }
@@ -1272,8 +1290,6 @@ void testNewTensorOps(){
 
 
 
-
-
 int main(){
 
   llvm_hpvm_initTensorRt(0);
@@ -1283,7 +1299,7 @@ int main(){
   
   // Function call per unit test
   testTensorHgemm(unitTestResults);
-  testTensorSgemm();
+  testTensorSgemm(unitTestResults);
 
   /*
   testTensorConv();