diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp
index b17285b5e05f6a8576c1e3991ad9f4ffa8735d9e..77ef5a396be425816ad093afdb13efe8f42cbd75 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp
@@ -430,6 +430,8 @@ NodeConfiguration *RuntimeController::getNodeConfiguration(const char *data) {
 }
 
 void RuntimeController::init(const char *Cstr) {
+   INFO("INIT RUNTIME CONTROLLER ==================\n");
+   printf("INIT RUNTIME CONTROLLER ==================\n");
   // We initialize the path to the profile info output file,
   // based on the path given for the configuration file
   setProfileInfoFilename(Cstr);
@@ -855,7 +857,7 @@ void RuntimeController::readConfigurationFile(const char *str) {
       firstTensorID += NodeConf->getApproxChoices().size();
 
     } else if (tokens[1] == "cpu") {
-      DEBUG("Found gpu configuration\n");
+      INFO("---------Found cpu configuration\n");
 
       // There must be at least one operation, with an approximation option
       CUSTOM_ASSERT((tokens.size() >= 5) &&
@@ -865,6 +867,10 @@ void RuntimeController::readConfigurationFile(const char *str) {
       InitialConfigurations.back().setup.insert(
           std::make_pair(tokens[0], NodeConf));
 
+      InitialConfigurations.back().idConfigMap.insert(
+                std::make_pair(firstTensorID, NodeConf));
+        INFO("*** firstTensorID = %d \n\n", firstTensorID);
+      INFO("FIXED CPU ID ISSUE\n");
       unsigned idx = 2;
       while (idx < tokens.size()) {
         if (tokens[idx] == "add") {
@@ -952,7 +958,7 @@ void RuntimeController::readConfigurationFile(const char *str) {
         }
         // TODO: other approximation options handled here
       }
-
+      firstTensorID += NodeConf->getApproxChoices().size();
     } else {
       DEBUG("Invalid Configuration File\n");
       exit(1);
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc
index 8f0b2dba1147b6a674d760a22112f03f962bf89c..d3a037ea152c279c1528d8a4ec24d32f28df64f9 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc
@@ -154,6 +154,7 @@ void *tensorRegularConvolutionCPU(void *input_ptr, void *filter_ptr,
                             output_width * batch_size;
   float *host_data = (float *)malloc(conv_data_size);
   printf("host data: %p\n", host_data);
+  printf("conv_data_size: %d\n", conv_data_size);
   printf("number of batches: %d\n", batch_size);
   omp_set_num_threads(4);
 #pragma omp parallel for
@@ -1066,7 +1067,7 @@ void *tensorSoftmaxCPU(void *input_ptr) {
   float *logits = (float *)input->host_data;
   int n = input->dims.dim_sizes[0];
   int c = input->dims.dim_sizes[1];
-
+  
   omp_set_num_threads(4);
 #pragma omp parallel for
   for (int i = 0; i < n; i++) {
@@ -1085,7 +1086,6 @@ void *tensorSoftmaxCPU(void *input_ptr) {
       logits[j] /= x;
     }
   }
-
   return input;
 }
 
@@ -1095,10 +1095,14 @@ void *tensorBatchNormCPU(void *input_ptr, void *gamma_ptr, void *beta_ptr,
   Tensor *input = (Tensor *)input_ptr;
   Tensor *gamma = (Tensor *)gamma_ptr;
   Tensor *beta = (Tensor *)beta_ptr;
+  Tensor *mean = (Tensor *)mean_ptr;
+  Tensor *variance = (Tensor *)variance_ptr;
 
   float *__restrict__ host_image = (float *)input->host_data;
   float *__restrict__ host_beta = (float *)beta->host_data;
   float *__restrict__ host_gamma = (float *)gamma->host_data;
+  float *__restrict__ host_mean = (float *)mean->host_data;
+  float *__restrict__ host_variance = (float *)variance->host_data;
 
   int batch_size = input->dims.dim_sizes[0];
   int channels = input->dims.dim_sizes[1];
@@ -1110,32 +1114,19 @@ void *tensorBatchNormCPU(void *input_ptr, void *gamma_ptr, void *beta_ptr,
 #pragma omp parallel for
   for (int b = 0; b < batch_size; b++) {
     for (int ch = 0; ch < channels; ch++) {
-      float mean = 0;
-#pragma omp simd reduction(+ : mean)
-      for (int i = 0; i < image_dim; i++) {
-        int index = b * channels * image_dim + ch * image_dim + i;
-        mean += host_image[index];
-      }
-      mean = mean / channels;
-
-      float variance = 0;
-#pragma omp simd reduction(+ : variance)
-      for (int i = 0; i < image_dim; i++) {
-        int index = b * channels * image_dim + ch * image_dim + i;
-        float tmp = host_image[index] - mean;
-        variance += (tmp * tmp);
-      }
-      variance = variance / channels;
-
-#pragma omp simd
+      float mean = host_mean[ch];
+      float sq_ep_var = sqrt(epsilon + host_variance[ch]);
+      float gamma = host_gamma[ch];
+      float beta = host_beta[ch];
+      #pragma omp simd
       for (int i = 0; i < image_dim; i++) {
         int index = b * channels * image_dim + ch * image_dim + i;
         host_image[index] =
-            host_beta[ch] + (host_gamma[ch] * ((host_image[index] - mean) /
-                                               sqrt(epsilon + variance)));
+            beta + (gamma * ((host_image[index] - mean) / sq_ep_var));
       }
     }
   }
+
   return input;
 }
 
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu
index 8c77234e2432bd5fe1cde144b031d42273140d42..b9f52c0c8dddb8e7a4aa37abec5ea0d9dfa7164b 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu
@@ -39,10 +39,43 @@
 #include "approxhpvm_runtime_utils.h"
 #include "approx_api.h"
 
+#include "tensor_runtime.h"
+#include "tensor_cpu_runtime.h"
+
 extern "C" {
 
 /**** Wrapper Runtime API ***/
 
+// Initialization and clean routines for various supported devices
+  void llvm_libtensorhpvm_init(int gpuid) {
+    llvm_hpvm_initApproxhpvmRt(gpuid);
+    llvm_hpvm_initTensorRtCPU();
+  }
+
+  void llvm_libtensorhpvm_cleanup() {
+    llvm_hpvm_cleanupApproxhpvmRt();
+    llvm_hpvm_cleanupTensorRtCPU();
+  }
+
+  void llvm_libtensorhpvm_request_tensor(const char* hpvm_node_id, void* tensor) {
+
+    NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+    if (NodeConf->isGPUNodeConfiguration()) {
+      DEBUG("GPU Configuration detected at node %s: requesting tensor\n", hpvm_node_id);
+      hpvm_request_tensor(tensor, 1); // 1 for GPU
+    } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("CPU Configuration detected at node %s: requesting tensor\n", hpvm_node_id);
+      hpvm_request_tensor(tensor, 0); // 0 for CPU
+    } else {
+      ERROR("Currently unsupported configuration\n");
+      abort();
+    }
+  }
+
+
+
+
+
 void *
 wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter,
                   void *bias, int conv_pad_h, int conv_pad_w, int conv_stride_h,
@@ -164,9 +197,129 @@ wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter,
       pool_out = activation_out;
     }
     return pool_out;
-  } else {
-    ERROR("Unsupported Configuration");
-    abort();
+  } else if(NodeConf->isCPUNodeConfiguration()) {
+	    DEBUG("CPU Configuration for ConvLayer\n");
+	    // Mapped to GPU - get a GPU node configuration
+	    CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      			std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					       int> > > > &ApproxChoices = CPUConf->getApproxChoices();
+      
+      // Check for convolution as first operation
+      CUSTOM_ASSERT((ApproxChoices.size() >= 1) &&
+      	      (ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::CONV) &&
+      	      "Incorrect number/type of operations in provided Conv layer configuration");
+      
+      void* conv_out = handleTensorConvApproximationTuples_CPU(ApproxChoices[0].second,
+      						     input, filter, conv_pad_h, conv_pad_w,
+      						     conv_stride_h, conv_stride_w);
+      void* add_out;
+    	if (bias != NULL) {
+    	  // Check for add as second operation
+    	  CUSTOM_ASSERT((ApproxChoices.size() >= 2) &&
+    			(ApproxChoices[1].first == CPUNodeConfiguration::TENSOR_OP::ADD) &&
+    			"Incorrect number/type of operations in provided Conv layer configuration");
+    	  add_out = handleTensorAddApproximationTuples_CPU(ApproxChoices[1].second,
+    						       conv_out, bias);
+    	} else {
+    	  add_out = conv_out;
+    	}
+    
+    	void* activation_out;
+    	switch (activation_id) {
+    	case -1:
+    	  { // No activation
+    	    INFO("No activation Function\n");
+    	    activation_out = add_out;
+    	  }
+    	  break;
+    	case 0:
+    	  { // TanH activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
+    			  (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::TANH) &&
+    			  "Incorrect number/type of operations in provided Conv layer configuration");
+    	    activation_out = handleTensorTanhApproximationTuples_CPU(ApproxChoices[2].second,
+    								 add_out);
+    	  }
+    	  break;
+    	case 1:
+    	  { // ReLU activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
+    			  (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::RELU) &&
+    			  "Incorrect number/type of operations in provided Conv layer configuration");
+    	    activation_out = handleTensorReluApproximationTuples_CPU(ApproxChoices[2].second,
+    								 add_out);
+    	  }
+    	  break;
+    	case 2:
+    	  { // Clipped ReLU activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
+    			  (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) &&
+    			  "Incorrect number/type of operations in provided Conv layer configuration");
+    	    activation_out =
+    	      handleTensorClippedReluApproximationTuples_CPU(ApproxChoices[2].second,
+    							 add_out, out_min, out_max);
+    	  }
+    	  break;
+    	default:
+    	  {
+    	    ERROR("Activation id %d NOT supported \n", activation_id);
+    	  }
+    	  break;
+    	}
+    
+    	void* pool_out;
+
+    	if (pool_size > 0) {
+    	  switch (pool_id) {
+    	  case 0:
+    	    {
+    	      // If we remove the asserts, we can have all cases handled by a single call
+    	      CUSTOM_ASSERT((ApproxChoices.back().first == CPUNodeConfiguration::TENSOR_OP::POOL_MAX) &&
+    			    "Expected POOL_MAX in provided Conv layer configuration");
+    	      pool_out =
+    		handleTensorPoolingApproximationTuples_CPU(ApproxChoices.back().second,
+    						       activation_out, pool_id,
+    						       pool_size, pool_size, 0, 0,
+    						       pool_size, pool_size);
+    	    }
+    	    break;
+    	  case 1:
+    	    {
+    	      CUSTOM_ASSERT((ApproxChoices.back().first == CPUNodeConfiguration::TENSOR_OP::POOL_MEAN) &&
+    			    "Expected POOL_MEAN in provided Conv layer configuration");
+    	      pool_out =
+    		handleTensorPoolingApproximationTuples_CPU(ApproxChoices.back().second,
+    						       activation_out, pool_id,
+    						       pool_size, pool_size, 0, 0,
+    						       pool_size, pool_size);
+    	    }
+    	    break;
+    	  case 2:
+    	    {
+    	      CUSTOM_ASSERT((ApproxChoices.back().first == CPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
+    			    "Expected POOL_MIN in provided Conv layer configuration");
+    	      pool_out =
+    		handleTensorPoolingApproximationTuples_CPU(ApproxChoices.back().second,
+    						       activation_out, pool_id,
+    						       pool_size, pool_size, 0, 0,
+    						       pool_size, pool_size);
+    	    }
+    	    break;
+    	  default:
+    	    {
+    	      ERROR("Pool id %d NOT supported \n", pool_id);
+    	    }
+    	    break;
+    	  }
+    	} else {
+    	  pool_out = activation_out;
+    	}
+    	return pool_out;
+    } else {
+        ERROR("Unsupported Configuration");
+        abort();
   }
 
   return NULL;
@@ -180,11 +333,12 @@ void *wrapper_ConvLayer2(
     // NOTE: out_min, out_max are only relevant for ClippedRelu
     float out_min, float out_max) {
 
-  INFO("*** Conv Layer \n");
+  INFO("*** ------Conv Layer \n");
 
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+ INFO("HERE\n");
   if (NodeConf->isGPUNodeConfiguration()) {
-    DEBUG("GPU Configuration for ConvLayer\n");
+    INFO("GPU Configuration for ConvLayer\n");
     // Mapped to GPU - get a GPU node configuration
     GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf;
 
@@ -306,7 +460,136 @@ void *wrapper_ConvLayer2(
       pool_out = activation_out;
     }
     return pool_out;
-  } else {
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+      INFO("CPU Configuration for ConvLayer\n");
+      // Mapped to CPU - get a CPU node configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+                  std::vector< std::pair<CPUNodeConfiguration::APPROX,
+                                 int> > > > &ApproxChoices =
+                                                  CPUConf->getApproxChoices();
+
+      // Check for convolution as first operation
+      CUSTOM_ASSERT((ApproxChoices.size() >= 1) &&
+             (ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::CONV) &&
+            "Incorrect number/type of operations in provided Conv layer configuration");
+
+      void* conv_out = handleTensorConvApproximationTuples_CPU(ApproxChoices[0].second,
+                                         input, filter, conv_pad_h, conv_pad_w,
+                                         conv_stride_h, conv_stride_w);
+
+      void* add_out;
+      if (bias != NULL) {
+        // Check for add as second operation
+        CUSTOM_ASSERT((ApproxChoices.size() >= 2) &&
+                  (ApproxChoices[1].first == CPUNodeConfiguration::TENSOR_OP::ADD) &&
+                  "Incorrect number/type of operations in provided Conv layer configuration");
+        add_out = handleTensorAddApproximationTuples_CPU(ApproxChoices[1].second,
+                                     conv_out, bias);
+      } else {
+        add_out = conv_out;
+      }
+
+      void* activation_out;
+      switch (activation_id) {
+      case -1:
+       { // No activation
+         INFO("No activation Function\n");
+         activation_out = add_out;
+       }
+       break;
+       case 0:
+         { // TanH activation
+           CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
+                         (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::TANH) &&
+                         "Incorrect number/type of operations in provided Conv layer configuration");
+           activation_out = handleTensorTanhApproximationTuples_CPU(ApproxChoices[2].second,
+                                            add_out);
+         }
+         break;
+       case 1:
+         { // ReLU activation
+           CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
+                         (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::RELU) &&
+                          "Incorrect number/type of operations in provided Conv layer configuration");
+           activation_out = handleTensorReluApproximationTuples_CPU(ApproxChoices[2].second,
+                                            add_out);
+         }
+         break;
+       case 2:
+         { // Clipped ReLU activation
+           CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
+                         (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) &&
+                         "Incorrect number/type of operations in provided Conv layer configuration");
+           activation_out =
+                     handleTensorClippedReluApproximationTuples_CPU(ApproxChoices[2].second,
+                                                add_out, out_min, out_max);
+         }
+         break;
+       default:
+         {
+           ERROR("Activation id %d NOT supported \n", activation_id);
+         }
+         break;
+       }
+
+       void* pool_out;
+
+       if (pool_size_v > 0) {
+         switch (pool_id) {
+         case 0:
+           {
+             // If we remove the asserts, we can have all cases handled by a single call
+             CUSTOM_ASSERT((ApproxChoices.back().first == CPUNodeConfiguration::TENSOR_OP::POOL_MAX) &&
+                    "Expected POOL_MAX in provided Conv layer configuration");
+
+             pool_out = handleTensorPoolingApproximationTuples_CPU(ApproxChoices.back().second,
+                                    activation_out, pool_id,
+                                    pool_size_v, pool_size_h,
+                                    pool_pad_v, pool_pad_h,
+                                    pool_stride_v, pool_stride_h);
+           }
+           break;
+         case 1:
+           {
+             CUSTOM_ASSERT((ApproxChoices.back().first == CPUNodeConfiguration::TENSOR_OP::POOL_MEAN) &&
+                    "Expected POOL_MEAN in provided Conv layer configuration");
+
+             // FIXIT: POOL_MEAN still needs fixing
+             pool_out =
+                handleTensorPoolingApproximationTuples_CPU(ApproxChoices.back().second,
+                            activation_out, pool_id,
+                            pool_size_v, pool_size_h,
+                            0, 0,
+                            pool_size_v, pool_size_h);
+           }
+           break;
+         case 2:
+           {
+             CUSTOM_ASSERT((ApproxChoices.back().first == CPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
+                    "Expected POOL_MIN in provided Conv layer configuration");
+             // FIXIT: Pool_MEAN needs fixing
+             pool_out =
+                handleTensorPoolingApproximationTuples_CPU(ApproxChoices.back().second,
+                            activation_out, pool_id,
+                            pool_size_v, pool_size_h, 0, 0,
+                            pool_size_v, pool_size_h);
+           }
+           break;
+         default:
+           {
+             ERROR("Pool id %d NOT supported \n", pool_id);
+           }
+           break;
+         }
+       } else {
+         pool_out = activation_out;
+       }
+     return pool_out;
+
+    }
+   else {
     ERROR("Unsupported Configuration");
     abort();
   }
@@ -386,7 +669,74 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights,
     } break;
     }
     return activation_out;
-  } else {
+  } else if (NodeConf->isCPUNodeConfiguration()){
+    	DEBUG("CPU Configuration for FCLayer\n");
+    	// Mapped to CPU - get a CPU node configuration
+    	CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+    
+    	std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+    				std::vector< std::pair<CPUNodeConfiguration::APPROX,
+    						       int> > > > &ApproxChoices =
+    	  CPUConf->getApproxChoices();
+    
+    	// Approximation choices must be for a FC wrapper operation
+    	CUSTOM_ASSERT((ApproxChoices.size() == 2 || ApproxChoices.size() == 3) &&
+    		      ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::MUL &&
+    		      ApproxChoices[1].first == CPUNodeConfiguration::TENSOR_OP::ADD &&
+    		      "Invalid configuration generated for FC layer wrapper operation");
+    
+    	void* gemm_out = handleTensorMulApproximationTuples_CPU(ApproxChoices[0].second,
+    							    input, weights);
+    	void* add_out = handleTensorAddApproximationTuples_CPU(ApproxChoices[1].second,
+    							   gemm_out, bias);
+    
+    	void* activation_out;
+    	switch (activation_id) {
+    	case -1:
+    	  { // No activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() == 2) &&
+    			  "Incorrect number of operations in provided FC layer configuration");
+    	    INFO("No activation Function\n");
+    	    activation_out = add_out;
+    	  }
+    	  break;
+    	case 0:
+    	  { // TanH activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() == 3) &&
+    			  (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::TANH) &&
+    			  "Incorrect number/type of operations in provided FC layer configuration");
+    	    activation_out = handleTensorTanhApproximationTuples_CPU(ApproxChoices[1].second,
+    								 add_out);
+    	  }
+    	  break;
+    	case 1:
+    	  { // ReLU activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() == 3) &&
+    			  (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::RELU) &&
+    			  "Incorrect number/type of operations in provided FC layer configuration");
+    	    activation_out = handleTensorReluApproximationTuples_CPU(ApproxChoices[1].second,
+    								 add_out);
+    	  }
+    	  break;
+    	case 2:
+    	  { // Clipped ReLU activation
+    	    CUSTOM_ASSERT((ApproxChoices.size() == 3) &&
+    			  (ApproxChoices[2].first == CPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) &&
+    			  "Incorrect number/type of operations in provided FC layer configuration");
+    	    activation_out =
+    	      handleTensorClippedReluApproximationTuples_CPU(ApproxChoices[1].second,
+    							 add_out, out_min, out_max);
+    	  }
+    	  break;
+    	default:
+    	  {
+    	    ERROR("Activation id %d NOT supported \n", activation_id);
+    	  }
+    	  break;
+    	}
+    	return activation_out;
+  } 
+  else {
     ERROR("Unsupported Configuration");
     abort();
   }
@@ -398,66 +748,152 @@ void *wrapper_tensorRelu(const char *hpvm_node_id, void *input_ptr) {
 
   INFO("*** Relu Operation \n");
 
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+
+  if (NodeConf->isGPUNodeConfiguration()) {
+
+      // Only mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf =
+          (GPUNodeConfiguration *)NodeConf;
 
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices = GPUConf->getApproxChoices();
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices = GPUConf->getApproxChoices();
 
-  // Approximation choices must be for a relu operation
-  CUSTOM_ASSERT(
-      ApproxChoices.size() == 1 &&
-      ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::RELU &&
-      "Invalid configuration generated for tensor relu wrapper operation");
+      // Approximation choices must be for a relu operation
+      CUSTOM_ASSERT(
+          ApproxChoices.size() == 1 &&
+          ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::RELU &&
+          "Invalid configuration generated for tensor relu wrapper operation");
+
+      return handleTensorReluApproximationTuples(ApproxChoices[0].second,
+                                                 input_ptr);
+      
+    } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("ReLU operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a relu operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+      	  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::RELU &&
+      	  "Invalid configuration generated for tensor relu wrapper operation");
+
+      return handleTensorReluApproximationTuples_CPU(ApproxChoices[0].second,
+					         input_ptr);
+    } else {
+      ERROR("Unsupported Configuration");
+      abort();
+    }
+
+    return NULL;
 
-  return handleTensorReluApproximationTuples(ApproxChoices[0].second,
-                                             input_ptr);
 }
 
 void *wrapper_tensorClippedRelu(const char *hpvm_node_id, void *input_ptr,
                                 float out_min, float out_max) {
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices = GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a relu operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-                ApproxChoices[0].first ==
-                    GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU &&
-                "Invalid configuration generated for tensor clipped relu "
-                "wrapper operation");
-
-  return handleTensorClippedReluApproximationTuples(
-      ApproxChoices[0].second, input_ptr, out_min, out_max);
+
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+  if (NodeConf->isGPUNodeConfiguration()) {
+
+      // mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf =
+          (GPUNodeConfiguration *)NodeConf;
+
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices = GPUConf->getApproxChoices();
+
+      // Approximation choices must be for a relu operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+                    ApproxChoices[0].first ==
+                        GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU &&
+                    "Invalid configuration generated for tensor clipped relu "
+                    "wrapper operation");
+
+      return handleTensorClippedReluApproximationTuples(
+          ApproxChoices[0].second, input_ptr, out_min, out_max);
+      
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+        DEBUG("Clipped ReLU operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a clipped relu operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+      	  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU &&
+      	  "Invalid configuration generated for tensor clipped relu wrapper operation");
+
+      return handleTensorClippedReluApproximationTuples_CPU(ApproxChoices[0].second,
+						        input_ptr, out_min, out_max);
+
+
+  } else {
+      ERROR("Unsupported Configuration");
+      abort();
+  }
+  return NULL;
+
 }
 
 void *wrapper_tensorTanh(const char *hpvm_node_id, void *input_ptr) {
   //  return tensorTanh(input_ptr);
 
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
 
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices = GPUConf->getApproxChoices();
+  if (NodeConf->isGPUNodeConfiguration()) {
+      GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf;
 
-  // Approximation choices must be for a tanh operation
-  CUSTOM_ASSERT(
-      ApproxChoices.size() == 1 &&
-      ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::TANH &&
-      "Invalid configuration generated for tensor tanh wrapper operation");
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices = GPUConf->getApproxChoices();
+
+      // Approximation choices must be for a tanh operation
+      CUSTOM_ASSERT(
+          ApproxChoices.size() == 1 &&
+          ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::TANH &&
+          "Invalid configuration generated for tensor tanh wrapper operation");
+
+      return handleTensorTanhApproximationTuples(ApproxChoices[0].second,
+                                                 input_ptr);
+      
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("TanH operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a tanh operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+      	  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::TANH &&
+      	  "Invalid configuration generated for tensor tanh wrapper operation");
+
+      return handleTensorTanhApproximationTuples_CPU(ApproxChoices[0].second,
+					         input_ptr);
+    } else {
+      ERROR("Unsupported Configuration");
+      abort();
+    }
+
+    return NULL;
 
-  return handleTensorTanhApproximationTuples(ApproxChoices[0].second,
-                                             input_ptr);
 }
 
 void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr,
@@ -466,55 +902,111 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr,
 
   INFO("*** BatchNorm Operation \n");
 
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
 
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices =
+  
+  if (NodeConf->isGPUNodeConfiguration()) {
+      // mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf =
+          (GPUNodeConfiguration *)NodeConf;
+
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices =
+
+              GPUConf->getApproxChoices();
 
-          GPUConf->getApproxChoices();
+      // printf("*** BatchNorm \n ApproxChoice = %d \n  BatchNorm = %d \n CONV = %d
+      // \n", ApproxChoices[0].first,
+      //	       GPUNodeConfiguration::TENSOR_OP::BATCHNORM,
+      //	       GPUNodeConfiguration::TENSOR_OP::CONV);
 
-  // printf("*** BatchNorm \n ApproxChoice = %d \n  BatchNorm = %d \n CONV = %d
-  // \n", ApproxChoices[0].first,
-  //	       GPUNodeConfiguration::TENSOR_OP::BATCHNORM,
-  //	       GPUNodeConfiguration::TENSOR_OP::CONV);
+      // Approximation choices must be for a batchnorm operation
+      CUSTOM_ASSERT(
+          ApproxChoices.size() == 1 &&
+          ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::BATCHNORM &&
+          "Invalid configuration generated for tensor batchnorm wrapper operation");
+
+      return handleTensorBatchNormApproximationTuples(
+          ApproxChoices[0].second, input_ptr, gamma_ptr, beta_ptr, mean_ptr,
+          variance_ptr, epsilon);
+
+    } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("BatchNorm operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a softmax operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+      	  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::BATCHNORM &&
+      	  "Invalid configuration generated for tensor batchnorm wrapper operation");
+
+      return handleTensorBatchNormApproximationTuples_CPU(ApproxChoices[0].second,
+						      input_ptr, gamma_ptr, beta_ptr,
+						      mean_ptr, variance_ptr, epsilon);
+    } else {
+      ERROR("Unsupported Configuration");
+      abort();
+    }
 
-  // Approximation choices must be for a batchnorm operation
-  CUSTOM_ASSERT(
-      ApproxChoices.size() == 1 &&
-      ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::BATCHNORM &&
-      "Invalid configuration generated for tensor batchnorm wrapper operation");
+    return NULL;
 
-  return handleTensorBatchNormApproximationTuples(
-      ApproxChoices[0].second, input_ptr, gamma_ptr, beta_ptr, mean_ptr,
-      variance_ptr, epsilon);
 }
 
 void *wrapper_tensorAdd(const char *hpvm_node_id, void *input_ptr,
                         void *bias_ptr) {
 
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+
+  if (NodeConf->isGPUNodeConfiguration()) {
 
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices =
+      // mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf =
+          (GPUNodeConfiguration *)NodeConf;
 
-          GPUConf->getApproxChoices();
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices =
 
-  // Approximation choices must be for an add operation
-  CUSTOM_ASSERT(
-      ApproxChoices.size() == 1 &&
-      ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::ADD &&
-      "Invalid configuration generated for tensor add wrapper operation");
+              GPUConf->getApproxChoices();
 
-  return handleTensorAddApproximationTuples(ApproxChoices[0].second, input_ptr,
-                                            bias_ptr);
+      // Approximation choices must be for an add operation
+      CUSTOM_ASSERT(
+          ApproxChoices.size() == 1 &&
+          ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::ADD &&
+          "Invalid configuration generated for tensor add wrapper operation");
+
+      return handleTensorAddApproximationTuples(ApproxChoices[0].second, input_ptr,
+                                                bias_ptr);
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("Add operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for an add operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+      	  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::ADD &&
+      	  "Invalid configuration generated for tensor add wrapper operation");
+
+      return handleTensorAddApproximationTuples_CPU(ApproxChoices[0].second,
+				            input_ptr, bias_ptr);
+  } else {
+      ERROR("Unsupported Configuration");
+      abort();
+  }
+  return NULL;
 }
 
 void *wrapper_tensorPooling(const char *hpvm_node_id, void *input_ptr,
@@ -525,37 +1017,73 @@ void *wrapper_tensorPooling(const char *hpvm_node_id, void *input_ptr,
 
   INFO("*** TensorPooling Operation \n");
 
-  //  return tensorPooling(input_ptr, poolFunction, window_height, window_width,
-  //		       vertical_pad, horizontal_pad, vertical_stride,
-  // horizontal_stride);
-
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices =
-
-          GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a single operation
-  CUSTOM_ASSERT(
-      ApproxChoices.size() == 1 &&
-      "Invalid configuration generated for tensor pool wrapper operation");
-  enum GPUNodeConfiguration::TENSOR_OP top = ApproxChoices[0].first;
-  // Approximation choices must be for a pool operation
-  CUSTOM_ASSERT(
-      (top == GPUNodeConfiguration::TENSOR_OP::POOL_MAX ||
-       top == GPUNodeConfiguration::TENSOR_OP::POOL_MEAN ||
-       top == GPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
-      "Invalid configuration generated for tensor pool wrapper operation");
-
-  return handleTensorPoolingApproximationTuples(
-      ApproxChoices[0].second, input_ptr, poolFunction, window_height,
-      window_width, vertical_pad, horizontal_pad, vertical_stride,
-      horizontal_stride);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+
+  if (NodeConf->isGPUNodeConfiguration()) {
+
+      //  return tensorPooling(input_ptr, poolFunction, window_height, window_width,
+      //		       vertical_pad, horizontal_pad, vertical_stride,
+      // horizontal_stride);
+
+      // Only mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf;
+
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices =
+
+              GPUConf->getApproxChoices();
+
+      // Approximation choices must be for a single operation
+      CUSTOM_ASSERT(
+          ApproxChoices.size() == 1 &&
+          "Invalid configuration generated for tensor pool wrapper operation");
+      enum GPUNodeConfiguration::TENSOR_OP top = ApproxChoices[0].first;
+      // Approximation choices must be for a pool operation
+      CUSTOM_ASSERT(
+          (top == GPUNodeConfiguration::TENSOR_OP::POOL_MAX ||
+           top == GPUNodeConfiguration::TENSOR_OP::POOL_MEAN ||
+           top == GPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
+          "Invalid configuration generated for tensor pool wrapper operation");
+
+      return handleTensorPoolingApproximationTuples(
+          ApproxChoices[0].second, input_ptr, poolFunction, window_height,
+          window_width, vertical_pad, horizontal_pad, vertical_stride,
+          horizontal_stride);
+      
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("Pool operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a single operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+  		  "Invalid configuration generated for tensor pool wrapper operation");
+      enum CPUNodeConfiguration::TENSOR_OP top = ApproxChoices[0].first;
+      // Approximation choices must be for a pool operation
+      CUSTOM_ASSERT((top == CPUNodeConfiguration::TENSOR_OP::POOL_MAX  ||
+  		   top == CPUNodeConfiguration::TENSOR_OP::POOL_MEAN ||
+  		   top == CPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
+  		  "Invalid configuration generated for tensor pool wrapper operation");
+
+      return handleTensorPoolingApproximationTuples_CPU(ApproxChoices[0].second,
+						    input_ptr, poolFunction,
+						    window_height, window_width,
+						    vertical_pad, horizontal_pad,
+						    vertical_stride, horizontal_stride);
+    } else {
+      ERROR("Unsupported Configuration");
+      abort();
+    }
+
+    return NULL;
+
 }
 
 void *wrapper_tensorGroupConvolution(const char *hpvm_node_id, void *input,
@@ -563,47 +1091,104 @@ void *wrapper_tensorGroupConvolution(const char *hpvm_node_id, void *input,
                                      int horizontal_pad, int vertical_stride,
                                      int horizontal_stride, int conv_mode,
                                      int conv_groups) {
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices = GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a group_conv operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-                ApproxChoices[0].first ==
-                    GPUNodeConfiguration::TENSOR_OP::GROUP_CONV &&
-                "Invalid configuration generated for tensor group_conv wrapper "
-                "operation");
-
-  return handleTensorGroupConvApproximationTuples(
-      ApproxChoices[0].second, input, filter, vertical_pad, horizontal_pad,
-      vertical_stride, horizontal_stride, conv_mode, conv_groups);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+
+  if (NodeConf->isGPUNodeConfiguration()) {
+
+      // Mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf =
+          (GPUNodeConfiguration *)NodeConf;
+
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices = GPUConf->getApproxChoices();
+
+      // Approximation choices must be for a group_conv operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+                    ApproxChoices[0].first ==
+                        GPUNodeConfiguration::TENSOR_OP::GROUP_CONV &&
+                    "Invalid configuration generated for tensor group_conv wrapper "
+                    "operation");
+
+      return handleTensorGroupConvApproximationTuples(
+          ApproxChoices[0].second, input, filter, vertical_pad, horizontal_pad,
+          vertical_stride, horizontal_stride, conv_mode, conv_groups);
+      
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("Group Convolution operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a group_conv operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+  		  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::GROUP_CONV &&
+  		  "Invalid configuration generated for tensor group_conv wrapper operation");
+
+      return handleTensorGroupConvApproximationTuples_CPU(ApproxChoices[0].second,
+						      input, filter,
+						      vertical_pad, horizontal_pad,
+						      vertical_stride, horizontal_stride,
+						      conv_mode, conv_groups);
+  } else {
+      ERROR("Unsupported Configuration");
+      abort();
+  }
+  return NULL;
+
 }
 
 void *wrapper_tensorSoftmax(const char *hpvm_node_id, void *input_ptr) {
   //  return tensorSoftmax(input_ptr);
 
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-      (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
+  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
+  if (NodeConf->isGPUNodeConfiguration()) {
+
+      // Mapped to GPU - get a GPU configuration
+      GPUNodeConfiguration *GPUConf =
+          (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
 
-  std::vector<
-      std::pair<GPUNodeConfiguration::TENSOR_OP,
-                std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
-      &ApproxChoices = GPUConf->getApproxChoices();
+      std::vector<
+          std::pair<GPUNodeConfiguration::TENSOR_OP,
+                    std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
+          &ApproxChoices = GPUConf->getApproxChoices();
 
-  // Approximation choices must be for a softmax operation
-  CUSTOM_ASSERT(
-      ApproxChoices.size() == 1 &&
-      ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::SOFTMAX &&
-      "Invalid configuration generated for tensor softmax wrapper operation");
+      // Approximation choices must be for a softmax operation
+      CUSTOM_ASSERT(
+          ApproxChoices.size() == 1 &&
+          ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::SOFTMAX &&
+          "Invalid configuration generated for tensor softmax wrapper operation");
+
+      return handleTensorSoftmaxApproximationTuples(ApproxChoices[0].second,
+                                                    input_ptr);
+      
+  } else if (NodeConf->isCPUNodeConfiguration()) {
+      DEBUG("SoftMax operation: CPU Configuration\n");
+      // Mapped to CPU - get a CPU configuration
+      CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf;
+
+      std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP,
+      		    std::vector< std::pair<CPUNodeConfiguration::APPROX,
+      					   int> > > > &ApproxChoices =
+        CPUConf->getApproxChoices();
+
+      // Approximation choices must be for a softmax operation
+      CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
+      	  ApproxChoices[0].first == CPUNodeConfiguration::TENSOR_OP::SOFTMAX &&
+      	  "Invalid configuration generated for tensor softmax wrapper operation");
+
+      return handleTensorSoftmaxApproximationTuples_CPU(ApproxChoices[0].second, input_ptr);
+  } else {
+      ERROR("Unsupported Configuration");
+      abort();
+  }
+  return NULL;
 
-  return handleTensorSoftmaxApproximationTuples(ApproxChoices[0].second,
-                                                input_ptr);
 }
 
 void *tensor_set_node_id(unsigned int node_id) {