diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..3cf770ebfe0c29b3f4ade39c89430884a5e8043f
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/Makefile
@@ -0,0 +1,31 @@
+DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
+CC = $(LLVM_SRC_ROOT)/../build/bin/clang++
+OPT = $(LLVM_SRC_ROOT)/../build/bin/opt
+LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
+
+SRC_DIR = src
+BUILD_DIR = build
+APP = fcl
+
+TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
+
+CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -fno-exceptions -ffast-math -std=c++11 -O3 
+
+TARGET = $(BUILD_DIR)/$(APP).visc
+SOURCES = $(SRC_DIR)/$(APP).cpp
+#OBJS = $(BUILD_DIR)/$(wildcard *.ll)
+.PRECIOUS: $(BUILD_DIR)/$(APP).ll 
+default: $(BUILD_DIR) $(TARGET)
+
+
+$(BUILD_DIR)/%.ll: $(SOURCES)
+	$(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $<
+
+$(BUILD_DIR)/%.visc: $(BUILD_DIR)/%.ll
+	$(OPT) -load LLVMGenVISC.so -genvisc -globaldce -visc-timers-gen $< -S -o $@ -debug
+
+$(BUILD_DIR):
+	mkdir -p $@
+
+clean:
+	rm -rf $(BUILD_DIR)
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/src/fcl.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/src/fcl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..49e708d51196522f711544afdf9bb6997f76ee78
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/src/fcl.cpp
@@ -0,0 +1,110 @@
+#include <iostream>
+#include <cstdio>
+#include <cstring>
+#include <cinttypes>
+#include <visc.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+
+using namespace std;
+
+void tensorMul(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::DEVICE);
+    __visc__attributes(2, t1, t2, 0);
+
+    // Call to visc tensor mul intrinsic
+    // It will return a void * that we cast to Tensor_t
+    // Not in place operation
+    void *r = __visc__tensor_mul(t1, t2);
+    //struct ret_t res;
+    //res.tensor = r;
+    __visc__return(r, 0);
+}
+
+void tensorAdd(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::DEVICE);
+    __visc__attributes(2, t1, t2, 0);
+
+    // Call to visc tensor add intrinsic
+    // It will return a void * that we cast to Tensor_t
+    // In place operation
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(r, 0);
+}
+
+void root(void *w, size_t bytesw, void *x, size_t bytesx, void *b, size_t bytesb) {
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(3, w, x, b, 0);
+
+    void *nodeMul = __visc__createNode(tensorMul);
+    void *nodeAdd = __visc__createNode(tensorAdd);
+
+    // node, src, dst, stream
+    __visc__bindIn(nodeMul, 0, 0, 0);
+    __visc__bindIn(nodeMul, 1, 1, 0);
+    __visc__bindIn(nodeMul, 2, 2, 0);
+    __visc__bindIn(nodeMul, 3, 3, 0);
+
+    // node, node, type, src, dst, stream
+    __visc__edge(nodeMul, nodeAdd, 1, 0, 0, 0);
+    __visc__edge(nodeMul, nodeAdd, 1, 1, 1, 0);
+
+    __visc__bindIn(nodeAdd, 4, 2, 0);
+    __visc__bindIn(nodeAdd, 5, 3, 0);
+
+    __visc__bindOut(nodeAdd, 0, 0, 0);
+    __visc__bindOut(nodeAdd, 1, 1, 0);
+
+}
+
+
+// Return type for the nodes
+struct ret_t {
+    void *tensor;
+    size_t bytes;
+};
+
+typedef struct __attribute__((__packed__)) {
+    void *w;
+    size_t bytesw;
+    void *x;
+    size_t bytesx;
+    void *b;
+    size_t bytesb;
+    struct ret_t r;
+}
+RootIn;
+
+int main() {
+
+    void *w;
+    void *x;
+    void *b;
+    int test_batch_size = 10000;
+    x = readInputTensor("t10k-images-idx3-ubyte", float_type,
+                        test_batch_size, 1, 28, 28);
+    w = readTrainedWeights("./model_params/lenet_params/ip1.bias.bin", float_type, 1, 1, 800, 500);
+    b = readTrainedWeights("./model_params/lenet_params/ip1.bin", float_type, 1, 1, 1, 500);
+
+    __visc__init();
+
+    RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
+    args->w = w;
+    args->bytesw = 0;
+    args->x = x;
+    args->bytesx = 0;
+    args->b = b;
+    args->bytesb = 0;
+
+    void *dfg = __visc__launch(0, root, (void *)args);
+
+    __visc__wait(dfg);
+
+    void *r = static_cast<RootIn*>(dfg)->r.tensor;
+    hpvm_request_tensor(r);
+
+    __visc__cleanup();
+    return 0;
+}
+
+
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorTypes.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorTypes.h
new file mode 100644
index 0000000000000000000000000000000000000000..3479a94abec9d6357edc26e4507ec80f8b060acb
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorTypes.h
@@ -0,0 +1,38 @@
+
+#ifndef TYPES_HEADER
+#define TYPES_HEADER
+
+
+struct Dimension_t{
+  int num_dims;
+  size_t* dim_sizes;
+};
+
+
+struct Tensor_t{
+  int tensor_id; // used for indexing (in the tensor runtime)
+  int data_type; // {float_type, double_type, half_type, int_type}
+  int data_format; // {nchw, nhwc}
+  void* host_data;
+  size_t num_elems; // Total elements
+  size_t size_in_bytes; // Total size in bytes
+  struct Dimension_t dims;
+};
+
+
+enum Tensor_type_t{
+  float_type,
+  double_type,
+  half_type,
+  int_type
+};
+
+
+// NOTE: Currently only NCHW is supported due to limited cuDNN support
+enum Tensor_format_t{
+  nchw,
+  nhwc 
+};
+
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..de6da3d3a26b2d6a41a29f49fcfe9a5dd30074cd
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
@@ -0,0 +1,24 @@
+#include <tensorTypes.h>
+
+// Header guards
+#ifndef UTILS_HEADER
+#define UTILS_HEADER
+
+void printTensorInfo(void* tensor_ptr);
+void dumpWeightsToFile(char* file_name, void* weights_ptr);
+void fillTensorWithOnes(void* tensor_ptr);
+void fillWithOnesAndTwos(void* tensor_ptr);
+void fillTensorWithNegOnes(void* tensor_ptr);
+void fillTensorVals(void* tensor_ptr);
+void printTensorValues(void* tensor_ptr);
+void printTensorDims(void* tensor_ptr);
+void compareTensors(void* tensor1_ptr, void* tensor2_ptr);
+void compareValues(void* tensor_ptr, float* data, size_t num_elems);
+void* readInputTensor(char* file_name, int data_type, int dim1_size, int dim2_size,
+		      int dim3_size, int dim4_size);
+struct Tensor* readTrainedWeights(char* file_name, int data_type, int dim1_size, int dim2_size,
+				  int dim3_size, int dim4_size);
+uint8_t* readLabels(char* labels_file, int num_labels);
+void computeAccuracy(char* labels_file, int num_labels, void* result_ptr);
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
new file mode 100644
index 0000000000000000000000000000000000000000..fe3a25233f3023b13d7e6cb799db497305b78bb4
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
@@ -0,0 +1,112 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+#ifndef DEVICE
+#define DEVICE GPU_TARGET
+#endif
+
+#include "llvm/SupportVISC/VISCHint.h"
+
+#ifdef __cplusplus
+extern "C" {
+void __visc__hint(visc::Target);
+void __visc__wait(void*);
+#else
+void __visc__hint(enum Target);
+void __visc__wait(unsigned);
+#endif
+
+#ifdef __cplusplus
+void* __visc__node(...);
+void* __visc__createNode(...);
+void* __visc__createNode1D(...);
+void* __visc__createNode2D(...);
+void* __visc__createNode3D(...);
+void __visc__return(...);
+#endif
+
+void __visc__attributes(unsigned, ...);
+void __visc__init();
+void __visc__cleanup();
+
+void __visc__bindIn(void*, unsigned, unsigned, unsigned);
+void __visc__bindOut(void*, unsigned, unsigned, unsigned);
+void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned);
+void __visc__push(void*, void*);
+void* __visc__pop(void*);
+void* __visc__launch(unsigned, ...);
+
+void* __visc__getNode();
+void* __visc__getParentNode(void*);
+void __visc__barrier();
+void* __visc__malloc(long);
+long __visc__getNodeInstanceID_x(void*);
+long __visc__getNodeInstanceID_y(void*);
+long __visc__getNodeInstanceID_z(void*);
+long __visc__getNumNodeInstances_x(void*);
+long __visc__getNumNodeInstances_y(void*);
+long __visc__getNumNodeInstances_z(void*);
+
+// Atomic
+// signed int
+int __visc__atomic_cmpxchg(int*, int, int);
+int __visc__atomic_add(int*, int);
+int __visc__atomic_sub(int*, int);
+int __visc__atomic_xchg(int*, int);
+int __visc__atomic_inc(int*);
+int __visc__atomic_dec(int*);
+int __visc__atomic_min(int*, int);
+int __visc__atomic_max(int*, int);
+int __visc__atomic_umax(int*, int);
+int __visc__atomic_umin(int*, int);
+int __visc__atomic_and(int*, int);
+int __visc__atomic_or(int*, int);
+int __visc__atomic_xor(int*, int);
+
+// Special Func
+float __visc__floor(float);
+float __visc__rsqrt(float);
+float __visc__sqrt(float);
+float __visc__sin(float);
+float __visc__cos(float);
+// unsigned int
+//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned);
+//unsigned __visc__atomic_add(unsigned*, unsigned);
+//unsigned __visc__atomic_sub(unsigned*, unsigned);
+//unsigned __visc__atomic_xchg(unsigned*, unsigned);
+//unsigned __visc__atomic_inc(unsigned*);
+//unsigned __visc__atomic_dec(unsigned*);
+//unsigned __visc__atomic_min(unsigned*, unsigned);
+//unsigned __visc__atomic_max(unsigned*, unsigned);
+//unsigned __visc__atomic_and(unsigned*, unsigned);
+//unsigned __visc__atomic_or(unsigned*, unsigned);
+//unsigned __visc__atomic_xor(unsigned*, unsigned);
+
+/*
+ * ApproxHPVM specific function calls
+ */
+
+void* __visc__tensor_add(void*, void*);
+void* __visc__tensor_mul(void*, void*);
+
+#include <unistd.h>
+
+long get_global_id(int);
+long get_group_id(int);
+long get_local_id(int);
+long get_local_size(int);
+
+
+void llvm_visc_track_mem(void*, size_t);
+void llvm_visc_untrack_mem(void*);
+void llvm_visc_request_mem(void*, size_t);
+void hpvm_request_tensor(void*);
+#ifdef __cplusplus
+}
+#endif
+