diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/pipeline.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/pipeline.cc
new file mode 100644
index 0000000000000000000000000000000000000000..310e8452ff99b42b21a71c21a5481d816469546a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/pipeline.cc
@@ -0,0 +1,150 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetTanh(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  
+  printf("********* Pipeline: Gaussian - Outline - Motion Blur - Emboss ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 9145;
+  int H = 240;
+  int W = 300;
+
+  void* golden_output = readInputTensor("../pipeline/golden_output/caltech-gaussian-outline-motionblur-emboss.bin",
+                                        CUDNN_DATA_FLOAT,
+                                        test_batch_size, 1, H, W);
+
+  void* input = readInputTensor("../pipeline/datasets/caltech101_255_float32.bin",
+                                        CUDNN_DATA_FLOAT,
+                                        test_batch_size, 1, H, W);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* gauss_filter = readTrainedWeights("../pipeline/filters/GaussianFilter.bin",
+					  float_type, 1, 1, 9, 9);    
+  void* outline_filter = readTrainedWeights("../pipeline/filters/OutlineFilter.bin",
+					  float_type, 1, 1, 3, 3);    
+  void* sharpen_filter = readTrainedWeights("../pipeline/filters/SharpenFilter.bin",
+					  float_type, 1, 1, 3, 3);    
+  void* motionblur_filter = readTrainedWeights("../pipeline/filters/MotionblurFilter.bin",
+					  float_type, 1, 1, 9, 9);    
+  //void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
+					//float_type, 1, 32, 1, 1);  
+  void* emboss_filter = readTrainedWeights("../pipeline/filters/EmbossFilter.bin",
+					  float_type, 1, 1, 5, 5);  
+  void* emboss_bias = readTrainedWeights("../pipeline/filters/EmbossBias.bin",
+					  float_type, 1, 1, 1, 1);  
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      const char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performnce profiling 
+    startProfiling();
+  
+    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
+    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
+
+    // NOTE: 'SAME' convolution
+    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
+				       conv_mode, conv_precision);
+
+    void* outline_out = tensorConvolution(gaussian_out, outline_filter, 1, 1, 1, 1,
+				       conv_mode, conv_precision);
+
+    void* motionblur_out = tensorConvolution(outline_out, motionblur_filter, 4, 4, 1, 1,
+				       conv_mode, conv_precision);
+
+    void* emboss_out = tensorConvolution(motionblur_out, emboss_filter, 2, 2, 1, 1,
+				       conv_mode, conv_precision);
+
+    void* result = tensorAdd(emboss_out, emboss_bias);
+
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(golden_output, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      const char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenetTanh();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/pipeline_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/pipeline_promise.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c271879b1ea775694ff425e042e5db9f16f9e686
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/pipeline_promise.cc
@@ -0,0 +1,145 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+
+/* NOTE: Reference Architecture to use for profiling */
+void testPipeline(){
+
+  int total_runs = 1;
+  if(Opentuner_run){
+    total_runs = 1000000;
+  }
+
+  printf("********* Pipeline: Gaussian - Outline - Motion Blur - Emboss ********** \n");
+
+  int test_batch_size = 9145;
+  int H = 240;
+  int W = 300;
+
+  void* golden_output = readTrainedWeights("../pipeline/golden_output/caltech-gaussian-outline-motionblur-emboss.bin",
+                                        float_type,
+                                        test_batch_size, 1, H, W);
+
+  clearTensorMap();
+  for(int i = 0; i < total_runs; i++){
+    void* input = readTrainedWeights("../pipeline/datasets/caltech101_255_float32.bin",
+                                          float_type,
+                                          test_batch_size, 1, H, W);
+
+    // NOTE: Filter descriptors do NOT have batch size
+    // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+    // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+    void* gauss_filter = readTrainedWeights("../pipeline/filters/GaussianFilter.bin",
+                                            float_type, 1, 1, 9, 9);
+    void* outline_filter = readTrainedWeights("../pipeline/filters/OutlineFilter.bin",
+                                            float_type, 1, 1, 3, 3);
+    void* sharpen_filter = readTrainedWeights("../pipeline/filters/SharpenFilter.bin",
+                                            float_type, 1, 1, 3, 3);
+    void* motionblur_filter = readTrainedWeights("../pipeline/filters/MotionblurFilter.bin",
+                                            float_type, 1, 1, 9, 9);
+    void* emboss_filter = readTrainedWeights("../pipeline/filters/EmbossFilter.bin",
+                                            float_type, 1, 1, 5, 5);
+    void* emboss_bias = readTrainedWeights("../pipeline/filters/EmbossBias.bin",
+                                            float_type, 1, 1, 1, 1);
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+        printf("Invalid descriptor \n");
+        abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+        abort();
+      }
+
+      close(fd);
+    }
+
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+
+    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
+                                           4, 4, 1, 1,
+                                           0, 0, // pool? no pooling needed
+                                           0,
+                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
+                                                   // Will have to rerun to generate golden output
+                                           9);
+    void* outline_out = ConvLayer_PROMISE(gaussian_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
+                                           1, 1, 1, 1,
+                                           0, 0, // pool? no pooling needed
+                                           0,
+                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
+                                                   // Will have to rerun to generate golden output
+                                           9);
+
+    void* motionblur_out = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
+                                           4, 4, 1, 1,
+                                           0, 0, // pool? no pooling needed
+                                           0,
+                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
+                                                   // Will have to rerun to generate golden output
+                                           9);
+
+    void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
+                                           2, 2, 1, 1,
+                                           0, 0, // pool? no pooling needed
+                                           0,
+                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
+                                                   // Will have to rerun to generate golden output
+                                           9);
+
+    computeAccuracy2(golden_output, test_batch_size, result);
+
+    freeOutputTensors();
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+        printf("Invalid descriptor \n");
+        abort();
+      }
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+  }
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testPipeline();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
diff --git a/llvm/projects/hpvm-tensor-rt/pipeline/dataset/.gitkeep b/llvm/projects/hpvm-tensor-rt/pipeline/dataset/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llvm/projects/hpvm-tensor-rt/pipeline/filters/.gitkeep b/llvm/projects/hpvm-tensor-rt/pipeline/filters/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llvm/projects/hpvm-tensor-rt/pipeline/golden_output/.gitkeep b/llvm/projects/hpvm-tensor-rt/pipeline/golden_output/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391