diff --git a/hpvm/projects/onnx/.gitignore b/hpvm/projects/onnx/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..2f4db75f1d4127e9537f7efdf6ed6556b5c8d93b
--- /dev/null
+++ b/hpvm/projects/onnx/.gitignore
@@ -0,0 +1,4 @@
+build/
+dist/
+onnx2hpvm.egg-info/
+.ipynb_checkpoints/
diff --git a/hpvm/projects/onnx/README.md b/hpvm/projects/onnx/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..eba07cd9d4102283362bbd021f28ff59b3755b0e
--- /dev/null
+++ b/hpvm/projects/onnx/README.md
@@ -0,0 +1,13 @@
+## Importing Conda Environment:
+
+conda env create -f onnx\_environment.yml
+
+## Activate/deactivate Conda Environment
+
+conda activate onnx\_frontend
+
+## Building and Installing Frontend for ONNX:
+
+python setup.py build
+
+python setup.py install
diff --git a/hpvm/projects/onnx/TODO.md b/hpvm/projects/onnx/TODO.md
new file mode 100644
index 0000000000000000000000000000000000000000..1ebb883f79b65b177baa5459b426bb3610f64ac0
--- /dev/null
+++ b/hpvm/projects/onnx/TODO.md
@@ -0,0 +1,43 @@
+# What kind of models we should use
+ResNet-50 -> as a start?
+BERT
+Mask R-CNN
+
+# Questions from Sudipta
+Graph optimization: operator fusions, data layout transformations
+Vectorization/tensorization
+Automatic scheduling
+Handling dynamic control flow in model graph
+Automatic differentiation
+
+# Operators used in the BERT model available in the ONNX model zoo, organized by type and number of occurences
+
+https://github.com/onnx/models/tree/master/text/machine_comprehension/bert-squad
+(Opset 10 versions)
+
+DATA MANIPULATION
+Unsqueeze 191
+Reshape 71
+Cast 70
+Transpose 62
+Concat 56
+Identity 28
+Squeeze 7
+Slice
+Shape 5
+ConstantOfShape 1
+Gather 1
+OneHot 1
+Split 1
+
+COMPUTATION
+Mul 186
+Add 185
+MatMul 98
+Sub 62
+ReduceMean 50
+Sqrt 25
+Reciprocal 25
+Softmax 12
+Pow 12
+Tanh 12
diff --git a/hpvm/projects/onnx/env.yaml b/hpvm/projects/onnx/env.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f05a9304a4a1145abb7311a83b16d84153288fbb
--- /dev/null
+++ b/hpvm/projects/onnx/env.yaml
@@ -0,0 +1,8 @@
+name: onnxfront
+channels:
+  - defaults
+dependencies:
+  - pip
+  - python=3.8.5=h7579374_1
+  - pip:
+    - onnx==1.8.0
diff --git a/hpvm/projects/onnx/frontend/README.md b/hpvm/projects/onnx/frontend/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69ffe81f5ad6b1a57dda1a6410d4348ff616148a
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/README.md
@@ -0,0 +1,10 @@
+### How to Run
+```
+python main.py
+```
+Set all your config, e.g. onnx model location, input size and emit directory for generated source code, in **config.py**.
+
+### Resources
+1. [ONNX overview](https://github.com/onnx/onnx/blob/master/docs/IR.md)
+2. [ONNX operator specs](https://github.com/onnx/onnx/blob/master/docs/Operators.md)
+3. [Conversion between models - available adapters](https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21)
\ No newline at end of file
diff --git a/hpvm/projects/onnx/frontend/__init__.py b/hpvm/projects/onnx/frontend/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/hpvm/projects/onnx/frontend/codegen_hpvm.py b/hpvm/projects/onnx/frontend/codegen_hpvm.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd9eac233071787baa6a1224ebb00b2eae837a1b
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/codegen_hpvm.py
@@ -0,0 +1,146 @@
+from os import PathLike
+from pathlib import Path
+from typing import Dict, List, Tuple, Union
+
+import jinja2
+
+from graph_builder import DFG
+from graph_ir import DFGNode, TensorNode, WeightTensor
+
+TEMPLATE_FILE = "template_hpvm.cpp"
+loader = jinja2.FileSystemLoader(searchpath=Path(__file__).parent)
+template_env = jinja2.Environment(loader=loader, trim_blocks=True)
+template = template_env.get_template(TEMPLATE_FILE)
+
+
+class CodeGen:
+    def __init__(
+        self,
+        dfg: DFG,
+        output_dir: PathLike,
+        input_size: int,
+        batch_size: int = None,
+        prefix: str = None,
+    ):
+        self.dfg = dfg
+        self.var_count = 0
+        self.output_dir = Path(output_dir)
+        self.prefix = prefix
+        # Some reasoning of input information
+        assert len(self.dfg.inputs) == 1
+        input_tensor = self.dfg.inputs[0]
+        self.input_name = input_tensor.name
+        self.input_shape = input_tensor.shape[1:]
+        self.input_size = input_size
+        self.batch_size = batch_size or input_size
+        # self.variables is a "node to our name" map
+        # Each value is (varname, bool) and the bool indicates
+        # "is root node input" or not.
+        IdenT = Union[str, int]
+        self.root_args = sorted(
+            [n for n in dfg.traverse_order if isinstance(n, TensorNode)],
+            key=lambda n: n.name,
+        )
+        self.weights = [n for n in self.root_args if isinstance(n, WeightTensor)]
+        self.variables: Dict[DFGNode, Tuple[IdenT, bool]] = {
+            f_name: (index, True) for index, f_name in enumerate(self.root_args)
+        }
+
+    ################################################
+    # Aux functions
+    ################################################
+
+    def _allocate_varname(self) -> str:
+        varname = f"var_{self.var_count}"
+        self.var_count += 1
+        return varname
+
+    @classmethod
+    def emit_weights(cls, weights: List[WeightTensor]) -> List[dict]:
+        ret = []
+        for weight in weights:
+            name = cls.make_c_identifier(weight.name)
+            file_path = f"{weight.new_name}_path.bin"
+            ret.append({"name": name, "shape": weight.shape, "filename": file_path})
+        return ret
+
+    @staticmethod
+    def make_c_identifier(name: str) -> str:
+        name = name.replace(".", "_")
+        if name[0].isnumeric():
+            name = "_" + name
+        return name
+
+
+class HpvmCodeGen(CodeGen):
+    def _emit_hpvm_node_edges(self, input_vars: List[DFGNode]) -> List[dict]:
+        ret = []
+        it = 0
+        for node in input_vars:
+            hpvm_var_name, is_root_input = self.variables[node]
+            if is_root_input:
+                assert isinstance(hpvm_var_name, int)
+                ret.append(
+                    {"is_bindin": True, "input_idx": hpvm_var_name, "edge_idx": it}
+                )
+            else:
+                ret.append(
+                    {"is_bindin": False, "input_node": hpvm_var_name, "edge_idx": it}
+                )
+            it += 1
+        return ret
+
+    def emit_hpvm_node_structures(self) -> List[dict]:
+        node_envs = []
+        for node in self.dfg.traverse_order:
+            if isinstance(node, TensorNode):
+                continue
+            inputs = self.dfg.node_args(node)
+            func_name, extra_args = node.hpvm_codegen()
+            if func_name == "":  # No code generation
+                # Node must have single input, we equate the output to
+                # the input and skip code generation.
+                assert len(inputs) == 1
+                self.variables[node] = self.variables[inputs[0]]
+                continue
+            varname = self._allocate_varname()
+            self.variables[node] = varname, False  # not root-node arg
+            node_envs.append(
+                {
+                    "name": varname,
+                    "input_size": len(inputs),
+                    "edges": self._emit_hpvm_node_edges(inputs),
+                    "call_name": func_name,
+                    "call_args": extra_args,
+                }
+            )
+        return node_envs
+
+    def emit_root_io(self) -> Tuple[List[str], str]:
+        input_args = [
+            self.make_c_identifier(node.name)
+            for node, (_, is_root) in self.variables.items()
+            if is_root
+        ]
+        output_arg = self.variables[self.dfg.output][0]
+        return input_args, output_arg
+
+    def compile(self) -> None:
+        nodes = self.emit_hpvm_node_structures()
+        inputs, output = self.emit_root_io()
+        weights = self.emit_weights(self.weights)
+        prefix = self.prefix or self.output_dir
+        with open(self.output_dir / "hpvm_src.cc", "w") as f:
+            f.write(
+                template.render(
+                    nodes=nodes,
+                    input_name=self.input_name,
+                    input_size=self.input_size,
+                    batch_size=self.batch_size,
+                    input_shape=self.input_shape,
+                    root_inputs=inputs,
+                    root_output=output,
+                    weights=weights,
+                    prefix=prefix,
+                )
+            )
diff --git a/hpvm/projects/onnx/frontend/codegen_tensor.py b/hpvm/projects/onnx/frontend/codegen_tensor.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0a73106054c41f1bbfcf6692a6e8398fd4de3b4
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/codegen_tensor.py
@@ -0,0 +1,65 @@
+from pathlib import Path
+from typing import Dict, List
+
+import jinja2
+
+from codegen_hpvm import CodeGen
+from graph_ir import DFGNode, TensorNode
+
+TEMPLATE_FILE = "template_tensor.cpp"
+loader = jinja2.FileSystemLoader(searchpath=Path(__file__).parent)
+template_env = jinja2.Environment(loader=loader, trim_blocks=True)
+template = template_env.get_template(TEMPLATE_FILE)
+
+
+class TensorCodeGen(CodeGen):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.variables: Dict[DFGNode, str] = {
+            n: self.make_c_identifier(n.name) for n in self.root_args
+        }
+
+    ################################################
+    # CodeGen functions
+    ################################################
+
+    def emit_graph(self) -> List[dict]:
+        graph_code = []
+        for node in self.dfg.traverse_order:
+            if isinstance(node, TensorNode):
+                continue
+            inputs = self.dfg.node_args(node)
+            func_name, extra_args = node.codegen()
+            if func_name == "":  # No code generation
+                # Node must have single input, we equate the output to
+                # the input and skip code generation.
+                assert len(inputs) == 1
+                self.variables[node] = self.variables[inputs[0]]
+                continue
+            varname = self._allocate_varname()
+            self.variables[node] = varname
+            input_args = [self.variables[n] for n in inputs] + extra_args
+            graph_code.append(
+                {"output": varname, "inputs": input_args, "function": func_name}
+            )
+        return graph_code
+
+    ################################################
+    # Compile is a top level function to compile an onnx model into C/C++
+    # program with HPVM Tensor Runtime
+    ################################################
+
+    def compile(self):
+        graph_code = self.emit_graph()
+        output_arg = self.variables[self.dfg.output]
+        with open(self.output_dir / "src.cc", "w") as f:
+            f.write(
+                template.render(
+                    input=self.input_name,
+                    input_shape=self.input_shape,
+                    output=output_arg,
+                    graph_code=graph_code,
+                    weights=self.emit_weights(self.weights),
+                    output_dir=self.output_dir,
+                )
+            )
diff --git a/hpvm/projects/onnx/frontend/graph_builder.py b/hpvm/projects/onnx/frontend/graph_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..3996b947d2e51b0eea515bdacf2ed1bb625e35ad
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/graph_builder.py
@@ -0,0 +1,349 @@
+from collections import defaultdict
+from os import PathLike
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple, Union
+
+import networkx as nx
+import onnx
+
+import graph_ir as g
+from onnx_attr import node_attr_to_dict
+
+GraphT = onnx.GraphProto
+NodeT = onnx.NodeProto
+NodeT.__hash__ = lambda self: id(self)
+NodeT.__repr__ = NodeT.__str__ = lambda self: self.name
+
+
+class MarkedSubGraph:
+    """A subgraph with information on how it should replace a node in a super graph.
+
+    subgraph: a nx.DiGraph subgraph
+    entry_edges: a list of edges from nodes "outside" to nodes in self.subgraph
+    exit: the exit node of the subgraph.
+        When this subgraph replaces a node `n`, self.exit will be connected to
+        whateven `n` is connected to.
+    """
+
+    def __init__(self, subgraph: nx.DiGraph, entry_edges, exit) -> None:
+        assert all(to in subgraph for _, to, _ in entry_edges)
+        assert exit in subgraph
+        self.subgraph, self.exit = subgraph, exit
+        self.entry_edges = [(f, t, {"index": i}) for f, t, i in entry_edges]
+
+    @classmethod
+    def idiomatic_1to2(cls, node1, node2, predecessors):
+        """Create an idiomatic replacement as follow:
+
+        node(arg1, arg2, arg3) -> node2(node1(arg1, arg2), arg3)"""
+        p0, p1, p2 = predecessors
+        graph = nx.DiGraph()
+        graph.add_edge(node1, node2, index=0)
+        return cls(graph, [(p0, node1, 0), (p1, node1, 1), (p2, node2, 1)], node2)
+
+
+EmitNodeT = Union[MarkedSubGraph, g.DFGNode]
+
+
+class DFG(object):
+    """ONNX model translated into DFG with `DFGNode`s.
+
+    This class has a DFG, input/output information, and a clear traverse order
+    (think dominant tree), and is easier for CodeGen classes to work with."""
+
+    def __init__(self, graph: GraphT):
+        self._check_model(graph)
+        self._var_count = 0
+        # Build explicit DFG with ONNX nodes
+        onnx_graph = self._build_onnx_dfg(graph)
+        # Convert ONNX dfg into DFGNode DFG
+        self.graph = self._build_dfg(onnx_graph)
+        # Find out input nodes and output node (unique)
+        # removing dead nodes along the way if any
+        self.inputs, self.output = self._dce_get_io_info()
+
+    ################ Interfaces:
+
+    @property
+    def traverse_order(self) -> List[g.DFGNode]:
+        """Get topological order of computational graph by use-def relation."""
+        return list(nx.topological_sort(self.graph))
+
+    def node_args(self, node: g.DFGNode):
+        """Get input arguments of node."""
+        sorted_edges = sorted(self.graph.in_edges(node, "index"), key=lambda p: p[2])
+        return [e[0] for e in sorted_edges]
+
+    def dump_weights(self, output_dir: PathLike) -> None:
+        """Dump `WeightTensor`s into output_dir."""
+        output_dir = Path(output_dir)
+        for node in self.graph.nodes:
+            if not isinstance(node, g.WeightTensor):
+                continue
+            node.dump_weight(output_dir / (node.new_name + "_path.bin"))
+
+    ################ Internal methods (high-level):
+
+    @staticmethod
+    def _check_model(onnx_graph: GraphT):
+        """Check model validaty and single output (which is our limitation)"""
+
+        import warnings
+        from onnx import checker, onnx_cpp2py_export
+
+        # try use onnx's own model checker before converting any model
+        try:
+            checker.check_graph(onnx_graph)
+        except onnx_cpp2py_export.checker.ValidationError as e:
+            warnings.warn(str(e))
+        if any(len(n.output) > 1 for n in onnx_graph.node):
+            raise ValueError("All node must have single output")
+        if len(onnx_graph.output) > 1:
+            raise ValueError("Graph must have single output")
+
+    @staticmethod
+    def _build_onnx_dfg(graph: GraphT) -> nx.DiGraph:
+        """Creates a DiGraph (by use-def relation) of onnx nodes from onnx GraphProto.
+        DiGraph is easier to use as a graph compared to GraphProto where use-def is implicit."""
+
+        ret_graph = nx.DiGraph()
+        onnx_defs, onnx_uses = def_use(graph.node)
+        tensors = extract_tensors_from_graph(graph)
+        ret_graph.add_nodes_from(graph.node)
+        for onnx_value_name, use_nodes in onnx_uses.items():
+            def_node = onnx_defs.get(onnx_value_name)
+            if def_node is None:
+                def_node = tensors[onnx_value_name]
+            for use_node, used_at_narg in use_nodes:
+                ret_graph.add_edge(def_node, use_node, index=used_at_narg)
+        return ret_graph
+
+    def _build_dfg(self, onnx_graph: nx.DiGraph) -> nx.DiGraph:
+        """Translate _build_onnx_dfg output into DFGNode DFG.
+        
+        First run some passes to process subgraphs that needs to be
+        processed together, then each unprocessed node is generated into
+        1 or more nodes."""
+
+        # Remove subgraphs that can be a single Flatten instead
+        onnx_graph = detect_flatten(onnx_graph)
+        # Remove subgraphs that look like padding but does nothing
+        onnx_graph = remove_no_padding(onnx_graph)
+        # For each onnx node, generate our nodes
+        node_to_nodes, error_nodes = {}, []
+        for onnx_node in nx.topological_sort(onnx_graph):
+            our_nodes = self._emit_node(onnx_graph, onnx_node)
+            if our_nodes is None:
+                error_nodes.append(onnx_node)
+            else:
+                node_to_nodes[onnx_node] = our_nodes
+        if error_nodes:
+            error_repr = [f"{n.name}({n.op_type})" for n in error_nodes]
+            if len(error_nodes) > 10:  # Magic number
+                raise ValueError(f"Unsupported operators (first 10): {error_repr[:10]}")
+            else:
+                raise ValueError(f"Unsupported operators: {error_repr}")
+        # Apply node_to_nodes replacement on onnx_graph to create a new DFG
+        return build_graph_with_mapping(onnx_graph, node_to_nodes)
+
+    def _dce_get_io_info(self):
+        inputs = [n for n in self.graph if isinstance(n, g.InputTensor)]
+        inputs_set = set(inputs)
+        reachables = set()
+        for component in nx.connected_components(self.graph.to_undirected()):
+            # If any inputs goes into this subgraph, it's alive.
+            if set(component).intersection(inputs_set):
+                reachables.update(component)
+        unreachables = set(self.graph) - reachables
+        # Remove nodes unreachable from input
+        self.graph.remove_nodes_from(unreachables)
+        # Then outputs are nodes with out_degree = 0
+        outputs = [n for n in self.graph if self.graph.out_degree[n] == 0]
+        assert len(outputs) == 1
+        return inputs, outputs[0]
+
+    @staticmethod
+    def _emit_node(in_graph: nx.DiGraph, node: NodeT) -> Optional[EmitNodeT]:
+        predec = sorted_inputs(in_graph, node)
+        if isinstance(node, g.DFGNode):
+            # Directly add node into return graph.
+            return node
+
+        attrs = node_attr_to_dict(node)
+        if node.op_type == "Conv":
+            weight_tensor = predec[1]
+            assert isinstance(weight_tensor, g.WeightTensor)
+            if len(weight_tensor.shape) != 4:
+                return None  # Only supports 2D conv
+            conv_node = g.Conv2DNode(node.name, attrs)
+            if len(predec) == 2:
+                return conv_node
+            # Split into conv followed by an addition
+            bias_node = g.BiasAddNode(f"Bias_{node.name.split('_')[-1]}")
+            return MarkedSubGraph.idiomatic_1to2(conv_node, bias_node, predec)
+        elif node.op_type in ("MatMul", "Gemm"):
+            mul_node = g.MatMulNode(node.name, attrs)
+            if node.op_type == "Gemm":
+                mul_node.gemm_transpose(node, predec)
+            if len(predec) == 2:
+                return mul_node
+            # Split into mul followed by an addition
+            bias_node = g.BiasAddNode(f"Bias_{node.name.split('_')[-1]}")
+            return MarkedSubGraph.idiomatic_1to2(mul_node, bias_node, predec)
+        one_to_one_nodes = {
+            "MaxPool": g.MaxPool2DNode,
+            "AveragePool": g.AveragePool2DNode,
+            "Add": g.AddNode,
+            "Softmax": g.SoftMaxNode,
+            "Relu": g.ReluNode,
+            "Tanh": g.TanhNode,
+            "BatchNormalization": g.BatchNormalizationNode,
+            "Pad": g.PadNode,
+            "Identity": g.IdentityNode,
+            "Flatten": g.FlattenNode,
+        }
+        if node.op_type in one_to_one_nodes:
+            return one_to_one_nodes[node.op_type](node.name, attrs)
+        return None
+
+
+def def_use(nodes: Iterable) -> Tuple[dict, dict]:
+    """Computes def/use relation from a list of node.
+
+    This method is duck-typed and operates on any node defining .input and .output.
+    """
+    defs, uses = {}, defaultdict(list)
+    for n in nodes:
+        for i, input_ in enumerate(n.input):
+            uses[input_].append((n, i))
+        for output in n.output:
+            defs[output] = n
+    return defs, uses
+
+
+def remove_no_padding(graph: nx.DiGraph) -> nx.DiGraph:
+    """Remove subgraphs that look like padding but does nothing."""
+    for node in list(graph.nodes):
+        if node.op_type != "Pad":
+            continue
+        input_args = sorted_inputs(graph, node)
+        # Find the second input argument to Pad (will be a Constant node)
+        # and take that away as well.
+        nct = input_args[1]
+        padding = node_attr_to_dict(nct)["value"]
+        if any(p != 0 for p in padding):
+            continue
+        # Connect input of Pad to where output of Pad goes
+        succ = graph.out_edges(node, "index")
+        for _, to, index in succ:
+            graph.add_edge(input_args[0], to, index=index)
+        # Remove nodes
+        graph.remove_nodes_from([node, nct])
+    return graph
+
+
+def detect_flatten(graph: nx.DiGraph) -> nx.DiGraph:
+    """Look for a shape-gather-unsqueeze-concat-reshape chain and replace that with flatten."""
+
+    for node in list(graph.nodes):
+        if node.op_type != "Shape":
+            continue
+        ng = get_next_in_chain(graph, "Gather", node)
+        # Find the second input argument to Gather (will be a Constant node)
+        # and take that away as well.
+        nct = sorted_inputs(graph, ng)[1]
+        nu = get_next_in_chain(graph, "Unsqueeze", ng)
+        nc = get_next_in_chain(graph, "Concat", nu)
+        nr = get_next_in_chain(graph, "Reshape", nc)
+        if nr is None:
+            continue
+        _, suffix = node.name.split("_")
+        gen_node = g.FlattenNode(f"Flatten_{suffix}")
+        replace_chain_with_node_(graph, [node, ng, nct, nu, nc, nr], gen_node)
+    return graph
+
+
+def get_next_in_chain(
+    graph: nx.DiGraph, type_: str, node: Optional[NodeT]
+) -> Optional[NodeT]:
+    """
+    Get a unique user node of the unique output of Node `node`,
+    and return it if it has Type `type_`.
+    """
+    if node is None or len(node.output) != 1:
+        return None  # Propagates None; Unique output
+    users = list(graph.neighbors(node))
+    if len(users) != 1 or users[0].op_type != type_:
+        return None  # Unique user of the output; Correct type
+    return users[0]
+
+
+def replace_chain_with_node_(graph: nx.DiGraph, chain: list, node) -> nx.DiGraph:
+    inputs = sorted_inputs(graph, chain[0])
+    succ = graph.out_edges(chain[-1], "index")
+    for i, n in enumerate(inputs):
+        graph.add_edge(n, node, index=i)
+    for _, to, index in succ:
+        graph.add_edge(node, to, index=index)
+    graph.remove_nodes_from(chain)
+    return graph
+
+
+def build_graph_with_mapping(
+    graph: nx.DiGraph, node_mapping: Dict[NodeT, EmitNodeT]
+) -> nx.DiGraph:
+    graph = graph.copy()
+    single_node, multi_node = {}, {}
+    for replace_node, by_node in node_mapping.items():
+        if isinstance(by_node, g.DFGNode):
+            single_node[replace_node] = by_node
+        else:
+            multi_node[replace_node] = by_node
+    # We do one-to-many replacements first
+    # because their predecessors are specified as onnx nodes.
+    for replace_node, subgraph in multi_node.items():
+        # Add subgraph itself
+        graph = nx.compose(graph, subgraph.subgraph)
+        # Add in edges
+        graph.add_edges_from(subgraph.entry_edges)
+        # Add out edges
+        succ = graph.out_edges(replace_node, "index")
+        for _, to, index in succ:
+            graph.add_edge(subgraph.exit, to, index=index)
+        # Remove old node
+        graph.remove_node(replace_node)
+    # Then do all one-to-one replacements.
+    graph = nx.relabel_nodes(graph, single_node)
+    return graph
+
+
+def extract_tensors_from_graph(onnx_graph: GraphT) -> Dict[str, g.TensorNode]:
+    tensors = {}
+    # parse weight
+    weight_cnt = 0
+    for weight_tensor in onnx_graph.initializer:
+        tensors[weight_tensor.name] = g.WeightTensor(
+            weight_tensor, f"weight_{weight_cnt}"
+        )
+        weight_cnt += 1
+    # parse input
+    input_cnt = 0
+    for input_ in onnx_graph.input:
+        if input_.name in tensors:
+            continue
+        tensors[input_.name] = g.InputTensor(input_, f"input_{input_cnt}")
+        input_cnt += 1
+    return tensors
+
+
+def sorted_inputs(graph: nx.DiGraph, node):
+    sorted_edges = sorted(graph.in_edges(node, "index"), key=lambda p: p[2])
+    return [e[0] for e in sorted_edges]
+
+
+def draw_graph(graph: nx.DiGraph, output_to):
+    from networkx.drawing.nx_agraph import to_agraph
+
+    agraph = to_agraph(graph)
+    agraph.layout("dot")
+    agraph.draw(output_to)
diff --git a/hpvm/projects/onnx/frontend/graph_ir.py b/hpvm/projects/onnx/frontend/graph_ir.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd42617f600e8ec8e6c9012fbd3adfa56ada76ec
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/graph_ir.py
@@ -0,0 +1,306 @@
+import abc
+from os import PathLike
+from typing import List, Tuple
+
+import onnx
+
+from onnx_attr import node_attr_to_dict
+
+
+class DFGNode(abc.ABC):
+    """Abstract node that represents 1 instruction in HPVM.
+    
+    op_type should be overriden in subclasses for readability.
+    """
+
+    op_type = ""
+
+    def __init__(self, name: str, attrs: dict = {}):
+        self.name, self.attrs = name, attrs
+
+    def codegen(self) -> Tuple[str, list]:
+        return "", []
+
+    def hpvm_codegen(self) -> Tuple[str, list]:
+        return "", []
+
+    def __repr__(self) -> str:
+        return f"{self.op_type}({self.name})"
+
+
+class TensorNode(DFGNode, abc.ABC):
+    """An abstract node for a value that exists without an instruction.
+
+    This is akin to Value class in LLVM, but in a different place on the
+    inheritance tree."""
+
+    def __init__(self, proto: onnx.TensorProto, new_name: str):
+        if not proto.name.strip():
+            raise ValueError("Tensor's name is required.")
+        super().__init__(proto.name, {})
+        self.new_name = new_name
+
+    def __str__(self):
+        return f"{self.__class__.__name__}: {self.name}"
+
+    __repr__ = __str__
+
+
+class InputTensor(TensorNode):
+    """Input to the computation graph.
+    
+    This is basically only used for its information about the ONNX input,
+    itself doesn't emit instruction or any interesting thing.
+    """
+
+    op_type = "InputTensor"
+
+    def __init__(self, input_proto: onnx.TensorProto, new_name: str):
+        super().__init__(input_proto, new_name)
+        # get type of input tensor
+        tensor_type = input_proto.type.tensor_type
+        # check if it has a shape:
+        shape = tensor_type.shape
+        self.shape: List[int] = [d.dim_value for d in shape.dim]
+
+
+class WeightTensor(TensorNode):
+    """An initialized parameter in ONNX graph.
+    
+    This is any parameter that has a initializer value in the ONNX model
+    (as opposed to InputTensor, which doesn't have any value).
+    """
+
+    op_type = "WeightTensor"
+
+    def __init__(self, weight_proto: onnx.TensorProto, new_name: str):
+        from onnx import numpy_helper
+
+        super().__init__(weight_proto, new_name)
+        self.shape = []
+        self.input_data = numpy_helper.to_array(weight_proto)
+        sh = self.input_data.shape
+        if len(sh) == 1:
+            self.shape = [1, sh[0], 1, 1]
+        elif len(sh) == 2:
+            self.shape = [1, 1, sh[0], sh[1]]
+        elif len(sh) == 4:
+            self.shape = [sh[0], sh[1], sh[2], sh[3]]
+        else:
+            self.shape = [1] * 4
+
+    def dump_weight(self, file_name: PathLike):
+        self.input_data.tofile(file_name)
+
+    def transpose_(self):
+        if len(self.input_data.shape) != 2:
+            raise ValueError("Can only transpose 2D array")
+        self.input_data = self.input_data.T
+        self.shape[3], self.shape[2] = self.shape[2:]
+
+
+class Conv2DNode(DFGNode):
+    op_type = "Conv2D"
+
+    def __init__(self, name: str, attrs: dict):
+        super().__init__(name, attrs)
+        padding = self.attrs["pads"]
+        assert len(padding) == 4, "2D convolution must have 4 padding values"
+        if any(p != padding[0] for p in padding[1:]):
+            raise ValueError("Convolution with different padding is unsupported")
+        self.padding = padding[0]
+        self.strides = self.attrs["strides"]
+
+    def codegen(self):
+        return (
+            "tensorConvolution",
+            [self.padding, self.padding, self.strides[0], self.strides[1]],
+        )
+
+    def hpvm_codegen(self):
+        return (
+            "__visc__tensor_convolution",
+            [self.padding, self.padding, self.strides[0], self.strides[1]],
+        )
+
+
+class _Pool2DNode(DFGNode, abc.ABC):
+    """Common super class of Average pooling and Max pooling."""
+
+    pool_type = "0"
+
+    def __init__(self, name: str, attrs: dict):
+        super().__init__(name, attrs)
+        self.strides = self.attrs["strides"]
+        self.pool_size = self.attrs["kernel_shape"]
+        self.padding = 0
+
+    def codegen(self):
+        return (
+            "tensorPooling",
+            [
+                self.pool_type,
+                *self.pool_size,
+                self.padding,
+                self.padding,
+                *self.strides,
+            ],
+        )
+
+    def hpvm_codegen(self):
+        return (
+            "__visc__tensor_pool_max",
+            [*self.pool_size, self.padding, self.padding, *self.strides],
+        )
+
+
+class MaxPool2DNode(_Pool2DNode):
+    pool_type = "0"
+    op_type = "MaxPool2D"
+
+
+class AveragePool2DNode(_Pool2DNode):
+    pool_type = "1"
+    op_type = "AveragePool2D"
+
+
+class BiasAddNode(DFGNode):
+    op_type = "BiasAdd"
+
+    def codegen(self):
+        return "tensorAdd", []
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_add", []
+
+
+class MatMulNode(DFGNode):
+    op_type = "MatMul"
+
+    def codegen(self):
+        return "tensorGemmGPU", []
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_mul", []
+
+    @staticmethod
+    def gemm_transpose(onnx_gemm_node, predec):
+        """Find and transpose weights of the onnx gemm node.
+        
+        This way we transpose the constant weight instead of exporting
+        a transpose node (which doesn't yet exist in HPVM).
+        """
+
+        def _transpose(weight):
+            if not isinstance(weight, WeightTensor):
+                raise ValueError(
+                    f"Cannot transpose non-const {weight} (transpose op needed)"
+                )
+            weight.transpose_()
+
+        # Some tensors may need transposing
+        attrs = node_attr_to_dict(onnx_gemm_node)
+        if attrs.get("transA", False):
+            _transpose(predec[0])
+        if attrs.get("transB", False):
+            _transpose(predec[1])
+
+
+class SoftMaxNode(DFGNode):
+    op_type = "SoftMax"
+
+    def codegen(self):
+        return "tensorSoftmax", []
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_softmax", []
+
+
+class AddNode(DFGNode):
+    op_type = "Add"
+
+    def codegen(self):
+        return "tensorAdd", []
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_add", []
+
+
+class ReluNode(DFGNode):
+    op_type = "ReLU"
+
+    def codegen(self):
+        return "tensorRelu", []
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_relu", []
+
+
+class TanhNode(DFGNode):
+    op_type = "Tanh"
+
+    def codegen(self):
+        return "tensorTanh", []
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_tanh", []
+
+
+class BatchNormalizationNode(DFGNode):
+    op_type = "BN"
+
+    def __init__(self, name: str, attrs: dict):
+        super().__init__(name, attrs)
+        self.epsilon = self.attrs["epsilon"]
+
+    def codegen(self):
+        return "tensorBatchNorm", [self.epsilon]
+
+    def hpvm_codegen(self):
+        return "__visc__tensor_batchnorm", [self.epsilon]
+
+
+class FlattenNode(DFGNode):
+    op_type = "Flatten"
+
+
+class ActivationNode(DFGNode):
+    """
+    Element wise operators that is for activation function
+    e.g. HardSigmoid, LeakyRelu, PRelu, Pow, Reciprocal,
+    Relu, Selu, Sigmoid, Softplus, Sqrt, ThresholdedRelu,
+    Abs, Ceil, Elu, Floor, Neg
+    """
+
+    pass
+
+
+class LogicalOpNode(DFGNode):
+    """
+    Element wise operators that is not for activation function.
+    In other words, they are logical comparison operators
+    e.g. And, Equal, Greater, GreaterOrEqual, Less, LessOrEqual,
+    Or, Xor
+    """
+
+    pass
+
+
+class ZeroPadding2DNode(DFGNode):
+    pass
+
+
+class DepthwiseConv2DNode(DFGNode):
+    pass
+
+
+class DenseNode(DFGNode):
+    pass
+
+
+class PadNode(DFGNode):
+    pass
+
+
+class IdentityNode(DFGNode):
+    pass
diff --git a/hpvm/projects/onnx/frontend/main.py b/hpvm/projects/onnx/frontend/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3037224ac6a44535eabc8cf9f8e96ecf48021c9
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/main.py
@@ -0,0 +1,106 @@
+from pathlib import Path
+from typing import Optional
+
+import onnx
+
+
+def check_version(model, new_version):
+    try:
+        opset = model.opset_import[0].version if model.opset_import else 1
+    except AttributeError:
+        opset = 1  # default opset version set to 1 if not specified
+    if opset != new_version:
+        from onnx import version_converter
+
+        try:
+            converted_model = version_converter.convert_version(model, new_version)
+            return converted_model
+        except RuntimeError as e:
+            raise RuntimeError(
+                f"Current version {opset} of ONNX model not supported!\n"
+                f"Conversion failed with message below: \n{e}"
+            )
+    return model
+
+
+def compile(
+    onnx_file: Path,
+    output_dir: Path,
+    input_size: int,
+    prefix: Optional[str],
+    batch_size: Optional[int],
+    opset: Optional[int],
+    hpvmc: bool,
+):
+    from graph_builder import DFG
+    from codegen_tensor import TensorCodeGen
+    from codegen_hpvm import HpvmCodeGen
+
+    model = onnx.load(onnx_file)
+    if opset is not None:
+        model = check_version(model, opset)
+    dfg = DFG(model.graph)
+    if hpvmc:
+        hpvmCodeGen = HpvmCodeGen(dfg, output_dir, input_size, batch_size, prefix)
+        hpvmCodeGen.compile()
+    else:
+        TensorCodeGen = TensorCodeGen(dfg, output_dir, input_size)
+        TensorCodeGen.compile()
+    dfg.dump_weights(output_dir)
+
+
+def parse_args():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="ONNX to HPVM-C")
+    parser.add_argument("onnx_file", type=Path, help="Path to input ONNX file")
+    parser.add_argument(
+        "output_dir",
+        type=Path,
+        help="Output folder where source file and weight files are generated",
+    )
+    parser.add_argument(
+        "input_size", type=int, help="Size of input dataset",
+    )
+    parser.add_argument(
+        "-p",
+        "--prefix",
+        type=str,
+        help="Prefix in generated code; will be attached before name of weight/input files."
+        "Defaults to output_dir.",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch-size",
+        type=int,
+        help="Batch size to be used in the generated code. "
+        "Defaults to input size (i.e., not using batch).",
+    )
+    parser.add_argument("--opset", type=int, help="ONNX opset version (enforced)")
+    parser.add_argument(
+        "-c",
+        "--compile-mode",
+        type=str,
+        choices=["tensor", "hpvmc"],
+        default="hpvmc",
+        help="""Output mode.
+tensor: HPVM Tensor Runtime;
+hpvmc: HPVM C Interface. Default value is hpvmc.""",
+    )
+
+    args = parser.parse_args()
+    args.hpvmc = args.compile_mode == "hpvmc"
+    delattr(args, "compile_mode")
+    return args
+
+
+def main():
+    import os
+
+    args = parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+    compile(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hpvm/projects/onnx/frontend/onnx_attr.py b/hpvm/projects/onnx/frontend/onnx_attr.py
new file mode 100644
index 0000000000000000000000000000000000000000..7434dccbbb9f579af7e3656ad73560332c88bc74
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/onnx_attr.py
@@ -0,0 +1,83 @@
+from typing import Tuple
+
+import numpy as np
+from onnx import AttributeProto, NodeProto, TensorProto
+
+
+def throw_ctor(ty):
+    def _throw_ctor(x):
+        raise ValueError(f"Cannot construct type {ty} from value {x}")
+
+    return _throw_ctor
+
+
+def composite_ctor(singular_ctor):
+    def _composite_ctor(xs):
+        return [singular_ctor(x) for x in xs]
+
+    return _composite_ctor
+
+
+def tensor_ctor(x: TensorProto) -> np.ndarray:
+    import numpy as np
+
+    tensor_typenames_to_numpy_ty = {
+        "BOOL": np.bool,
+        "UINT8": np.uint8,
+        "INT8": np.int8,
+        "UINT16": np.uint16,
+        "INT16": np.int16,
+        "INT32": np.int32,
+        "UINT32": np.uint32,
+        "INT64": np.int64,
+        "UINT64": np.uint64,
+        "STRING": np.str,
+        "FLOAT16": np.float16,
+        "FLOAT": np.float32,
+        "DOUBLE": np.float64,
+        # 'BFLOAT16' -- unsupported
+        "COMPLEX64": np.complex64,
+        "COMPLEX128": np.complex128,
+    }
+
+    get_tensor_typename = TensorProto.DataType.Name
+    tensor_typename = get_tensor_typename(x.data_type)
+    if tensor_typename not in tensor_typenames_to_numpy_ty:
+        raise ValueError(f"Tensor with type {tensor_typename} cannot be processed")
+    numpy_dtype = tensor_typenames_to_numpy_ty[tensor_typename]
+    numpy_arr = np.frombuffer(x.raw_data, dtype=numpy_dtype).reshape(x.dims).copy()
+    return numpy_arr
+
+
+def parse_node_attr(onnx_attr: AttributeProto) -> Tuple[str, object]:
+    from collections import namedtuple
+
+    AttrMeta = namedtuple("AttrMeta", ["ctor", "data_name"])
+    attr_typenames_to_meta = {
+        "FLOAT": AttrMeta(float, "f"),
+        "INT": AttrMeta(int, "i"),
+        "STRING": AttrMeta(str, "s"),
+        "TENSOR": AttrMeta(tensor_ctor, "t"),
+        "GRAPH": AttrMeta(throw_ctor("GRAPH"), "g"),
+        "SPARSE_TENSOR": AttrMeta(throw_ctor("SPARSE_TENSOR"), "sparse_tensor"),
+        "FLOATS": AttrMeta(composite_ctor(float), "floats"),
+        "INTS": AttrMeta(composite_ctor(int), "ints"),
+        "STRINGS": AttrMeta(composite_ctor(str), "strings"),
+        "TENSORS": AttrMeta(composite_ctor(tensor_ctor), "tensors"),
+        "GRAPHS": AttrMeta(throw_ctor("GRAPHS"), "graphs"),
+        "SPARSE_TENSORS": AttrMeta(throw_ctor("SPARSE_TENSORS"), "sparse_tensors"),
+    }
+
+    get_attr_typename = AttributeProto.AttributeType.Name
+    typename = get_attr_typename(onnx_attr.type)
+    assert (
+        typename in attr_typenames_to_meta
+    ), f"ONNX attribute contains non-ONNX type {typename}"
+    attr_meta = attr_typenames_to_meta[typename]
+    data = getattr(onnx_attr, attr_meta.data_name)
+    parsed_data = attr_meta.ctor(data)
+    return parsed_data
+
+
+def node_attr_to_dict(onnx_node: NodeProto):
+    return {attr.name: parse_node_attr(attr) for attr in onnx_node.attribute}
diff --git a/hpvm/projects/onnx/frontend/template_hpvm.cpp b/hpvm/projects/onnx/frontend/template_hpvm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e4d13c84ece354d561b141693a3b4ec40a68974a
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/template_hpvm.cpp
@@ -0,0 +1,98 @@
+#include <string>
+#include <visc.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+
+{% for node in nodes %}
+void {{node.name}}_node(
+{%- for n in range(node.input_size) -%}
+void *t{{n}}, size_t bytes_t{{n}}{{", " if not loop.last}}
+{%- endfor %}) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes({{node.input_size}}, {% for n in range(node.input_size) -%}
+t{{n}}{{", " if not loop.last}}
+{%- endfor %}, 0);
+  void *r = {{node.call_name}}({% for n in range(node.input_size) -%}
+t{{n}}{{", " if not loop.last}}
+{%- endfor %}{{", " if node.call_args}}{{node.call_args|join(", ")}});
+  __visc__return(2, r, (size_t) 0);
+}
+
+{% endfor -%}
+
+void root(void *__output, {%- for n in root_inputs -%}
+void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
+{%- endfor %}) {
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes({{root_inputs|length}}, {% for n in root_inputs -%}
+{{n}}{{", " if not loop.last}}
+{%- endfor %}, 0);
+
+{% for node in nodes %}
+  void* {{node.name}} = __visc__createNodeND(0, {{node.name}}_node);
+{% for edge in node.edges %}
+{% if edge.is_bindin %}
+  __visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 1}}, {{edge.edge_idx * 2}}, 0);
+  __visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 2}}, {{edge.edge_idx * 2 + 1}}, 0);
+{% else %}
+  __visc__edge({{edge.input_node}}, {{node.name}}, 1, 0, {{edge.edge_idx * 2}}, 0);
+  __visc__edge({{edge.input_node}}, {{node.name}}, 1, 1, {{edge.edge_idx * 2 + 1}}, 0);
+{% endif %}
+{% endfor %}
+
+{% endfor %}
+  __visc__bindOut({{root_output}}, 0, 0, 0);
+  __visc__bindOut({{root_output}}, 1, 1, 0);
+}
+
+struct ret_t {
+  void* tensor;
+  size_t bytes;
+};
+
+typedef struct __attribute__((__packed__)) {
+  void *__output;
+{% for n in root_inputs %}
+  void *{{n}};
+  size_t {{n}}_bytes;
+{% endfor %}
+  struct ret_t r;
+} RootIn;
+
+
+const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size;
+
+int main(){
+  std::string dir_prefix = "{{prefix}}/";
+  std::string input_path = dir_prefix + "input.bin";
+  std::string labels_path = dir_prefix + "labels32.bin";
+{% for w in weights %}
+  std::string {{w.name}}_path = dir_prefix + "{{w.filename}}";
+  void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
+{% endfor %}
+
+  RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
+  void* {{input_name}} = create4DTensor(0, nchw, batch_size, {{input_shape|join(', ')}});
+{% for n in root_inputs %}
+  args->{{n}} = {{n}};
+  args->{{n}}_bytes = 0;
+{% endfor %}
+
+  __visc__init();
+  startMemTracking();
+  for (int i = 0; i < batch_count; i++){
+    int start = i * batch_size, end = start + batch_size;
+    copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}});
+
+    void* dfg = __visc__launch(0, root, (void*) args);
+    __visc__wait(dfg);
+    void *result = static_cast<RootIn*>(args)->__output;
+    hpvm_request_tensor(result, 0);
+
+    uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end);
+    computeAccuracy3(labels, result);
+    freeBatchMemory();
+  }
+  __visc__cleanup();
+  return 0;
+}
diff --git a/hpvm/projects/onnx/frontend/template_tensor.cpp b/hpvm/projects/onnx/frontend/template_tensor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae3060836451069ed6aee9321d714db1b66c1723
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/template_tensor.cpp
@@ -0,0 +1,50 @@
+#include <chrono>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../../tensor_runtime/include/tensor_runtime.h"
+#include "../include/utils.h"
+
+int main() {
+  std::string dir_prefix = "{{output_dir}}";
+  std::string input_path = dir_prefix + "input.bin";
+  std::string labels_path = dir_prefix + "labels.bin";
+{% for w in weights %}
+  std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}");
+  void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
+{% endfor %}
+
+  llvm_hpvm_initTensorRt(0);
+  startMemTracking();
+
+  int test_input_size = {{input_shape[0]}};
+  int batch_size = {{input_shape[0]}};
+  // # FIXME: Ceiling for batch_count
+  int batch_count = test_input_size / batch_size;
+  float final_accuracy = 0.0;
+
+  for (int i = 0; i < batch_count; i++) {
+    int start = i * batch_size;
+    int end = (i + 1) * batch_size;
+    void *{{input}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}});
+
+{% for code in graph_code %}
+    auto {{code.output}} = {{code.function}}({{code.inputs|join(', ')}});
+{% endfor %}
+    
+    uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end);
+    float accuracy = computeAccuracy3(labels, {{output}});
+    final_accuracy += accuracy;
+    freeBatchMemory();
+  }
+
+  final_accuracy = final_accuracy / batch_count;
+  dumpFinalAccuracy(final_accuracy);
+  llvm_hpvm_cleanupTensorRt();
+  return 0;
+}
\ No newline at end of file
diff --git a/hpvm/projects/onnx/models/alexnet/alexnet.onnx b/hpvm/projects/onnx/models/alexnet/alexnet.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..d87fa5fe75eddf8ae2823bd0ace120bbacd50cdf
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/alexnet.onnx differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_0.npz b/hpvm/projects/onnx/models/alexnet/test_data_0.npz
new file mode 100644
index 0000000000000000000000000000000000000000..0089f591dc76a98182e0384c70874fca701d3469
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_0.npz differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_1.npz b/hpvm/projects/onnx/models/alexnet/test_data_1.npz
new file mode 100644
index 0000000000000000000000000000000000000000..30b9d311981525cff5ef71ad588a763eb403c1b4
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_1.npz differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_2.npz b/hpvm/projects/onnx/models/alexnet/test_data_2.npz
new file mode 100644
index 0000000000000000000000000000000000000000..f73145dc9154aacf13455de9a865889d54efe263
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_2.npz differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0dbdb3ecde433c70d5201cce0343b396261ca9e0
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c9a100142c98c684ce9358966c6ae4b99e62521d
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..968e6125f1ab45d6d3a1c2067309f05a2aab689a
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c841190b7499f8eded7a484e691254a5aafbc4db
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..189e616b49d2fa21fae9a32425e24cd263ca0203
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ec17185491fa73d344cb46415185422bc7f416bc
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..3d91313b14fb444c268f57e8bb05f8c38421107d
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..49e4e4f83c8384886ceb08283a6b46d2430ac1b9
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..72d611c195d20032b4953562f4722b4e1ddedff8
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..dfe5b65c8260d684171cc66e3759cb66e02a1ed7
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f737585597ce2b56253e37f75f3defbbfce30cec
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..6374baf7869b08c98c3b43aebbe58e3d15e99774
Binary files /dev/null and b/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/keras/alexnet.onnx b/hpvm/projects/onnx/models/keras/alexnet.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..452d9f846fad21b5fd89321e86b8a7945c625d2d
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/alexnet2.onnx b/hpvm/projects/onnx/models/keras/alexnet2.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e1147a8e589243004411b2b4f958a2240f3df94d
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet2.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/alexnet8k.onnx b/hpvm/projects/onnx/models/keras/alexnet8k.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..49f74d5f2f4dfb74f266b174a5a72d5c7add0cc1
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet8k.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/alexnet_last.onnx b/hpvm/projects/onnx/models/keras/alexnet_last.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f9f82e3cbfd79df383afe3c61540fdfded45a976
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/alexnet_last.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/lenet.onnx b/hpvm/projects/onnx/models/keras/lenet.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..659764f2f46d05dad62c2187c13c665839fe3058
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/lenet.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/mobilenet.onnx b/hpvm/projects/onnx/models/keras/mobilenet.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..947ab70daf9653af5f7d84349f92aca79d31c131
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/mobilenet.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx b/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..998d7f05b540b97e0fa36daab280aebbb3574eff
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/resnet.onnx b/hpvm/projects/onnx/models/keras/resnet.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..b9ad6bc843e3167151e74ad1a720130ef8421b4e
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/resnet.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx b/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..2395a9770aaf0df87d8e2acdad0935a8058fed3b
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx differ
diff --git a/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx b/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a96ddf4afffca4528d27b4f4fd482aeddc8ae165
Binary files /dev/null and b/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx differ
diff --git a/hpvm/projects/onnx/models/mnist/mnist.onnx b/hpvm/projects/onnx/models/mnist/mnist.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..fc1a3f733c6e6243dd23dacb125b7a372de55a50
Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/mnist.onnx differ
diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f0072d51a480af615e92312608f75993be9f1136
Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a6f4cdf92e27aaab21e44098b5b28e16048098e2
--- /dev/null
+++ b/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb
@@ -0,0 +1,2 @@
+
+J(ãêsDU®ÄŒtÍEÚ'DWQeÄYôÐÄQôÄ3vÂNKBÄñ³Ä
\ No newline at end of file
diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b40ca9538b7b03111268892ca8d7c71a5e376d06
Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..786b006694a575f64fa212cb60d60d962fc042b0
--- /dev/null
+++ b/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb
@@ -0,0 +1,2 @@
+
+J(E_ÅÓ;Ã¹ÒÄXê“Äy›Ä„*_DHÔºÃ“!‘Ã9ZÞÂ
\ No newline at end of file
diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d84a2064893ba68ad633391af74744dfc991a7cd
Binary files /dev/null and b/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..cb22e7b4ff7c12b9ec73ab832e34c3a566a51077
--- /dev/null
+++ b/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb
@@ -0,0 +1,2 @@
+
+J(láÅ4‹Ä’†DMWÄ‘ÆDË¿Ä>á'ÅìaÂ¤&•B6hE
\ No newline at end of file
diff --git a/hpvm/projects/onnx/models/resnet50/resnet50.onnx b/hpvm/projects/onnx/models/resnet50/resnet50.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..09992ccb764246e182874eda716d535c83a82123
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/resnet50.onnx differ
diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..846b581f7683fe007535309dd7bba5e356e524f6
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5c357e69d8f002e509525cbf8bbb470fa6bc575e
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0454c0cd3e58115a8805a1f770ea5406b5a72dbb
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..631cf2cad41b3d582b4d792ef480f8fcdec261df
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb differ
diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..665be39c8ee7136b9341f5ecdb370433d8afc910
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb differ
diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ac41dadf34359eb8ea9d185ad181138b32e09cec
Binary files /dev/null and b/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb differ
diff --git a/hpvm/projects/onnx/profile/alexnet_keras.py b/hpvm/projects/onnx/profile/alexnet_keras.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcbf35f0c632eb27d300a94fde5882761701c53b
--- /dev/null
+++ b/hpvm/projects/onnx/profile/alexnet_keras.py
@@ -0,0 +1,123 @@
+import numpy as np
+from keras.datasets import cifar10
+from keras.models import Sequential
+from keras.layers.core import Dense, Dropout, Flatten, Activation
+from keras.layers.convolutional import Conv2D
+from keras.optimizers import Adam
+from keras.layers.pooling import MaxPooling2D
+from keras.utils.np_utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras import backend as K
+from keras import regularizers
+from keras.callbacks import LearningRateScheduler
+import sys
+import struct
+import keras
+import numpy as np
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+def buildModel2():
+
+    activation_type = "tanh"
+    weight_decay = 1e-4
+    
+    model = Sequential()
+    model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type,
+                     input_shape=(3, 32, 32), padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))
+    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
+    model.add(Dropout(0.2))
+    model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay)))
+    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
+    model.add(Dropout(0.3))
+
+    model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))   
+    model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))
+    model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))
+    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
+    model.add(Dropout(0.4))
+
+    model.add(Flatten())
+    #model.add(Flatten())
+    #model.add(Dense(256))
+    model.add(Dense(10))
+    model.add(Activation('softmax'))
+    
+    return model
+
+def lr_schedule(epoch):
+  lrate = 0.001
+  if epoch > 20:
+    lrate = 0.0005
+  if epoch > 40:
+    lrate = 0.0003
+  if epoch > 60:
+    lrate = 0.0001
+  if epoch > 80:
+    lrate = 0.00005  
+    
+  return lrate
+
+def trainModel(model):    
+    #dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/"
+
+    #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)
+    # Compile the model
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=Adam(lr=0.0001, decay=1e-6),
+                  #optimizer = opt_rms,
+                  metrics=['accuracy'])
+
+    #print to_categorical(Y_train, 10)
+    print (to_categorical(Y_train))
+
+
+    datagen = ImageDataGenerator(
+        rotation_range=15,
+        width_shift_range=0.1,
+        height_shift_range=0.1,
+        horizontal_flip=True,
+        )
+    datagen.fit(X_train)
+
+    
+    model.fit(X_train, to_categorical(Y_train, 10),
+              batch_size=128,
+              shuffle=True,
+              epochs = 1,
+              #epochs=100,
+              validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)])
+
+    # Evaluate the model
+    scores = model.evaluate(X_test.astype('float32'), to_categorical(Y_test, 10))
+
+    print('Loss: %.3f' % scores[0])
+    print('Accuracy: %.3f' % scores[1])
+        
+    print ("*** TRAINED MODEL ****\n")
+    
+K.set_image_data_format('channels_first')
+(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+test_labels = Y_test
+train_labels = Y_train
+
+#X_train = X_train.astype('float32')
+#X_test = X_test.astype('float32')
+X_train = X_train / 255.0
+X_test = X_test / 255.0
+
+mean = np.mean(X_train,axis=(0,1,2,3))
+std = np.std(X_train,axis=(0,1,2,3))   
+X_train = (X_train-mean)/(std+1e-7)
+X_test = (X_test-mean)/(std+1e-7)
+model = buildModel2()
+trainModel(model)
+import time
+start = time.time()
+keras_result = model.predict(X_test[:8000])
+print("time:", time.time() - start)
\ No newline at end of file
diff --git a/hpvm/projects/onnx/profile/alexnet_onnx.py b/hpvm/projects/onnx/profile/alexnet_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bde4eaab39d901ef131f7cda59810721d701e1b
--- /dev/null
+++ b/hpvm/projects/onnx/profile/alexnet_onnx.py
@@ -0,0 +1,45 @@
+import numpy as np
+from keras.datasets import cifar10
+from keras import backend as K
+import numpy as np
+import os
+import keras2onnx
+import onnx
+import onnxruntime
+import time
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+K.set_image_data_format('channels_first')
+(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+test_labels = Y_test
+train_labels = Y_train
+
+#X_train = X_train.astype('float32')
+#X_test = X_test.astype('float32')
+X_train = X_train / 255.0
+X_test = X_test / 255.0
+
+mean = np.mean(X_train,axis=(0,1,2,3))
+std = np.std(X_train,axis=(0,1,2,3))   
+X_train = (X_train-mean)/(std+1e-7)
+X_test = (X_test-mean)/(std+1e-7)
+
+sess = onnxruntime.InferenceSession("../models/keras/alexnet.onnx")
+
+input_name = sess.get_inputs()[0].name
+print("Input name  :", input_name)
+input_shape = sess.get_inputs()[0].shape
+print("Input shape :", input_shape)
+input_type = sess.get_inputs()[0].type
+print("Input type  :", input_type)
+
+output_name = sess.get_outputs()[0].name
+print("Output name  :", output_name)  
+output_shape = sess.get_outputs()[0].shape
+print("Output shape :", output_shape)
+output_type = sess.get_outputs()[0].type
+print("Output type  :", output_type)
+
+start_time = time.time()
+ort_result = sess.run([output_name], {input_name: X_test.astype('float32')[:8000]})
+print("time: ", time.time() - start_time)
\ No newline at end of file
diff --git a/hpvm/projects/onnx/profile/alexnet_tvm.py b/hpvm/projects/onnx/profile/alexnet_tvm.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a3847335f43e0d9af9a5a51fd1ba90582f22489
--- /dev/null
+++ b/hpvm/projects/onnx/profile/alexnet_tvm.py
@@ -0,0 +1,45 @@
+import numpy as np
+from keras.datasets import cifar10
+from keras import backend as K
+import sys
+import struct
+import numpy as np
+import os
+import tvm
+import tvm.relay as relay
+from tvm.contrib import graph_runtime
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+K.set_image_data_format('channels_last')
+(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+X_test = X_test / 255.0
+mean = np.mean(X_train,axis=(0,1,2,3))
+std = np.std(X_train,axis=(0,1,2,3))   
+X_test = (X_test-mean)/(std+1e-7)
+
+import onnx
+onnx_model = onnx.load("../models/keras/alexnet_last.onnx")
+
+input_name = 'conv2d_8_input'
+input_size = 8000
+shape_dict = {input_name: X_test[:input_size].shape}
+mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+target = 'cuda -libs=cudnn,cublas'
+with relay.build_config(opt_level=3):
+    graph, lib, params = relay.build(mod, target, params=params)
+
+import time
+ctx = tvm.gpu()
+#data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
+# create module
+module = graph_runtime.create(graph, lib, ctx)
+# set input and parameters
+module.set_input("conv2d_8_input", X_test[:input_size].astype("float32"))
+module.set_input(**params)
+# run
+start_time = time.time()
+module.run()
+out_shape = (input_size, 10)
+# get output
+out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()
+print("Time:", time.time() - start_time)
\ No newline at end of file
diff --git a/hpvm/projects/onnx/profile/prof.nvvp b/hpvm/projects/onnx/profile/prof.nvvp
new file mode 100644
index 0000000000000000000000000000000000000000..be5cbd996b6980e9e4bad773fe9f3abea1ccb0f6
Binary files /dev/null and b/hpvm/projects/onnx/profile/prof.nvvp differ
diff --git a/hpvm/projects/onnx/setup.py b/hpvm/projects/onnx/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bcdce7a1d126e0c792bd3bf49026cb61b39f5c1
--- /dev/null
+++ b/hpvm/projects/onnx/setup.py
@@ -0,0 +1,12 @@
+
+from setuptools import setup
+
+setup(
+    name='frontend',
+    version='1.0',
+    description='HPVM frontend modules',
+    author='Yuanjing Shi',
+    author_email='ys26@illinois.edu',
+    packages=['fronend'],
+    install_requires=[],
+)
diff --git a/hpvm/projects/onnx/specs.dump b/hpvm/projects/onnx/specs.dump
new file mode 100644
index 0000000000000000000000000000000000000000..3016325b9bb2f94c3d66f0185408cccfa7cf9a43
--- /dev/null
+++ b/hpvm/projects/onnx/specs.dump
@@ -0,0 +1,1051 @@
+
+Computer
+********
+
+
+Summary
+-------
+
+-Computer-
+Processor		: 40x Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz
+Memory		: 65830MB (28703MB used)
+Operating System		: Ubuntu 16.04.6 LTS
+User Name		: ys26 (Yuanjing Shi)
+Date/Time		: Tue 31 Mar 2020 07:33:16 PM CDT
+-Display-
+Resolution		: 0x0 pixels
+OpenGL Renderer		: Unknown
+X11 Vendor		: (null)
+-Multimedia-
+Audio Adapter		: HDA-Intel - HDA Intel PCH
+Audio Adapter		: HDA-Intel - HDA NVidia
+Audio Adapter		: HDA-Intel - HDA NVidia
+-Input Devices-
+ Power Button
+ Power Button
+ HDA Intel PCH Front Mic
+ HDA Intel PCH Rear Mic
+ HDA Intel PCH Line
+ HDA Intel PCH Line Out Front
+ HDA Intel PCH Line Out Surround
+ HDA Intel PCH Line Out CLFE
+ HDA Intel PCH Front Headphone
+ HDA NVidia HDMI/DP,pcm		: 3=
+ HDA NVidia HDMI/DP,pcm		: 7=
+ HDA NVidia HDMI/DP,pcm		: 8=
+ HDA NVidia HDMI/DP,pcm		: 9=
+ HDA NVidia HDMI/DP,pcm		: 3=
+ HDA NVidia HDMI/DP,pcm		: 7=
+ HDA NVidia HDMI/DP,pcm		: 8=
+ HDA NVidia HDMI/DP,pcm		: 9=
+ Dell Dell USB Keyboard
+ Logitech USB-PS/2 Optical Mouse
+-Printers-
+No printers found
+-SCSI Disks-
+ATA Samsung SSD 850
+ATA Samsung SSD 850
+Generic Ultra HS-SD/MMC
+
+Operating System
+----------------
+
+-Version-
+Kernel		: Linux 4.15.0-66-generic (x86_64)
+Compiled		: #75~16.04.1-Ubuntu SMP Tue Oct 1 14:01:08 UTC 2019
+C Library		: Unknown
+Default C Compiler		: GNU C Compiler version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.11) 
+Distribution		: Ubuntu 16.04.6 LTS
+-Current Session-
+Computer Name		: tyler
+User Name		: ys26 (Yuanjing Shi)
+Home Directory		: /home/ys26
+Desktop Environment		: Terminal
+-Misc-
+Uptime		: 67 days, 16 hours and 1 minute
+Load Average		: 19.85, 20.81, 20.51
+
+Kernel Modules
+--------------
+
+-Loaded Modules-
+nvidia_uvm
+nfsv3
+nfs_acl
+nfs
+lockd		: NFS file locking service version 0.5.
+grace
+fscache		: FS Cache Manager
+nvram
+video		: ACPI Video Driver
+msr		: x86 generic MSR driver
+xt_tcpudp		: Xtables: TCP, UDP and UDP-Lite match
+iptable_filter		: iptables filter table
+ip_tables		: IPv4 packet filter
+x_tables		: {ip,ip6,arp,eb}_tables backend module
+input_leds		: Input -&gt; LEDs Bridge
+snd_hda_codec_hdmi		: HDMI HD-audio codec
+binfmt_misc
+xfs		: SGI XFS with ACLs, security attributes, realtime, no debug enabled
+nvidia_drm
+nvidia_modeset
+nvidia
+intel_rapl		: Driver for Intel RAPL (Running Average Power Limit)
+sb_edac		: MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers -  Ver: 1.1.2
+x86_pkg_temp_thermal		: X86 PKG TEMP Thermal Driver
+intel_powerclamp		: Package Level C-state Idle Injection for Intel CPUs
+coretemp		: Intel Core temperature monitor
+snd_hda_codec_realtek		: Realtek HD-audio codec
+kvm_intel
+snd_hda_codec_generic		: Generic HD-audio codec parser
+kvm
+snd_hda_intel		: Intel HDA driver
+snd_hda_codec		: HDA codec core
+snd_hda_core		: HD-audio bus
+snd_hwdep		: Hardware dependent layer
+irqbypass		: IRQ bypass manager utility module
+snd_pcm		: Midlevel PCM code for ALSA.
+intel_wmi_thunderbolt		: Intel WMI Thunderbolt force power driver
+intel_cstate
+snd_seq_midi		: Advanced Linux Sound Architecture sequencer MIDI synth.
+intel_rapl_perf
+snd_seq_midi_event		: MIDI byte &lt;-&gt; sequencer event coder
+drm_kms_helper		: DRM KMS helper
+snd_rawmidi		: Midlevel RawMidi code for ALSA.
+drm		: DRM shared core routines
+snd_seq		: Advanced Linux Sound Architecture sequencer.
+snd_seq_device		: ALSA sequencer device management
+ipmi_devintf		: Linux device interface for the IPMI message handler.
+snd_timer		: ALSA timer interface
+ipmi_msghandler		: Incoming and outgoing message routing for an IPMI interface.
+fb_sys_fops		: Generic file read (fb in system RAM)
+snd		: Advanced Linux Sound Architecture driver for soundcards.
+syscopyarea		: Generic copyarea (sys-to-sys)
+sysfillrect		: Generic fill rectangle (sys-to-sys)
+sysimgblt		: 1-bit/8-bit to 1-32 bit color expansion (sys-to-sys)
+lpc_ich		: LPC interface for Intel ICH
+soundcore		: Core sound module
+ioatdma
+shpchp		: Standard Hot Plug PCI Controller Driver
+mac_hid
+ib_iser		: iSER (iSCSI Extensions for RDMA) Datamover
+rdma_cm		: Generic RDMA CM Agent
+iw_cm		: iWARP CM
+ib_cm		: InfiniBand CM
+ib_core		: core kernel InfiniBand API
+iscsi_tcp		: iSCSI/TCP data-path
+libiscsi_tcp		: iSCSI/TCP data-path
+libiscsi		: iSCSI library functions
+scsi_transport_iscsi		: iSCSI Transport Interface
+parport_pc		: PC-style parallel port driver
+ppdev
+lp
+parport
+sunrpc
+autofs4
+btrfs
+zstd_compress		: Zstd Compressor
+raid10		: RAID10 (striped mirror) personality for MD
+raid456		: RAID4/5/6 (striping with parity) personality for MD
+async_raid6_recov		: asynchronous RAID-6 recovery api
+async_memcpy		: asynchronous memcpy api
+async_pq		: asynchronous raid6 syndrome generation/validation
+async_xor		: asynchronous xor/xor-zero-sum api
+async_tx		: Asynchronous Bulk Memory Transactions API
+xor
+raid6_pq		: RAID6 Q-syndrome calculations
+libcrc32c		: CRC32c (Castagnoli) calculations
+raid1		: RAID1 (mirroring) personality for MD
+raid0		: RAID0 (striping) personality for MD
+multipath		: simple multi-path personality for MD
+linear		: Linear device concatenation personality for MD
+uas
+usb_storage		: USB Mass Storage driver for Linux
+hid_generic		: HID generic driver
+usbhid		: USB HID core driver
+hid
+mxm_wmi		: MXM WMI Driver
+crct10dif_pclmul		: T10 DIF CRC calculation accelerated with PCLMULQDQ.
+crc32_pclmul
+ghash_clmulni_intel		: GHASH Message Digest Algorithm, acclerated by PCLMULQDQ-NI
+pcbc		: PCBC block cipher algorithm
+igb		: Intel(R) Gigabit Ethernet Network Driver
+aesni_intel		: Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized
+dca
+i2c_algo_bit		: I2C-Bus bit-banging algorithm
+aes_x86_64		: Rijndael (AES) Cipher Algorithm, asm optimized
+ahci		: AHCI SATA low-level driver
+crypto_simd
+ptp		: PTP clocks support
+glue_helper
+cryptd		: Software async crypto daemon
+pps_core		: LinuxPPS support (RFC 2783) - ver. 5.3.6
+libahci		: Common AHCI SATA low-level routines
+wmi		: ACPI-WMI Mapping Driver
+
+Boots
+-----
+
+-Boots-
+Fri Jan 24 02:31		: 44..15.0-66-generi|still
+Mon Nov 18 18:04		: 44..15.0-66-generi|still
+Fri Nov 1 1:52		: 44..15.0-66-generi|-
+Fri Nov 1 1:25		: 44..15.0-66-generi|-
+
+Languages
+---------
+
+-Available Languages-
+en_US.utf8		: English locale for the USA
+
+Filesystems
+-----------
+
+-Mounted File Systems-
+udev	/dev	0.00 % (31.4 GiB of 31.4 GiB)	
+tmpfs	/run	7.15 % (5.8 GiB of 6.3 GiB)	
+/dev/mapper/tyler--vg-root	/	5.81 % (1720.1 GiB of 1826.1 GiB)	
+tmpfs	/dev/shm	0.00 % (31.4 GiB of 31.4 GiB)	
+tmpfs	/run/lock	0.08 % (5.0 MiB of 5.0 MiB)	
+tmpfs	/sys/fs/cgroup	0.00 % (31.4 GiB of 31.4 GiB)	
+/dev/sdb1	/srv/local	9.99 % (1676.1 GiB of 1862.1 GiB)	
+/dev/sdb1	/nix	9.99 % (1676.1 GiB of 1862.1 GiB)	
+/dev/sda2	/boot	11.01 % (3.3 GiB of 3.7 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/home/deepanv2	/home/deepanv2	84.56 % (1580.7 GiB of 10238.0 GiB)	
+tmpfs	/run/user/242233	0.00 % (6.3 GiB of 6.3 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/home/nz11	/home/nz11	84.56 % (1580.7 GiB of 10238.0 GiB)	
+tmpfs	/run/user/1091162	0.00 % (6.3 GiB of 6.3 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/home/hsharif3	/home/hsharif3	84.56 % (1580.7 GiB of 10238.0 GiB)	
+tmpfs	/run/user/641600	0.00 % (6.3 GiB of 6.3 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/home/bjschre2	/home/bjschre2	84.56 % (1580.7 GiB of 10238.0 GiB)	
+tmpfs	/run/user/373498	0.00 % (6.3 GiB of 6.3 GiB)	
+/etc/autofs/cs_vadve/auto.direct	/shared	84.56 % (1580.7 GiB of 10238.0 GiB)	
+engr-linux-siebl1.engr.illinois.edu:/srv/software/megacli-8.07	/software/megacli-8.07	95.35 % (92.6 GiB of 1992.1 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/shared	/shared	84.56 % (1580.7 GiB of 10238.0 GiB)	
+engr-linux-siebl1.engr.illinois.edu:/srv/software/cuda-9.1	/software/cuda-9.1	95.35 % (92.6 GiB of 1992.1 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/home/yifanz16	/home/yifanz16	84.56 % (1580.7 GiB of 10238.0 GiB)	
+tmpfs	/run/user/1086025	0.00 % (6.3 GiB of 6.3 GiB)	
+vadve-file-01.cs.illinois.edu:/srv/home/ys26	/home/ys26	84.56 % (1580.7 GiB of 10238.0 GiB)	
+tmpfs	/run/user/1055463	0.00 % (6.3 GiB of 6.3 GiB)	
+
+Display
+-------
+
+-Display-
+Resolution		: 0x0 pixels
+Vendor		: (null)
+Version		: (null)
+-Monitors-
+-Extensions-
+
+Environment Variables
+---------------------
+
+-Environment Variables-
+MANPATH		: /usr/share/lmod/lmod/share/man::
+NIX_PROFILES		: /nix/var/nix/profiles/default /home/ys26/.nix-profile
+XDG_SESSION_ID		: 14482
+TERM		: xterm-256color
+SHELL		: /bin/bash
+DISTARCH		: Linux-x86_64
+MODULEPATH_ROOT		: /etc/modulefiles/software
+SSH_CLIENT		: 10.251.9.206 64268 22
+CONDA_SHLVL		: 1
+CONDA_PROMPT_MODIFIER		: (base) 
+LMOD_PKG		: /usr/share/lmod/lmod
+LMOD_VERSION		: 7.7.14
+GTK_MODULES		: gail:atk-bridge
+SSH_TTY		: /dev/pts/7
+USER		: ys26
+LMOD_sys		: Linux
+CONDA_EXE		: /home/ys26/anaconda3/bin/conda
+_CE_CONDA
+MAIL		: /var/mail/ys26
+PATH		: /home/ys26/anaconda3/bin:/home/ys26/anaconda3/condabin:/home/ys26/.nix-profile/bin:/nix/var/nix/profiles/default/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/ys26/.local/bin:/home/ys26/bin
+CONDA_PREFIX		: /home/ys26/anaconda3
+NIX_PATH		: nixpkgs=/nix/var/nix/profiles/per-user/root/channels/nixpkgs:/nix/var/nix/profiles/per-user/root/channels
+PWD		: /home/ys26
+LANG		: en_US.UTF-8
+MODULEPATH		: /etc/modulefiles/software/Linux:/etc/modulefiles/software/Core:/usr/share/lmod/lmod/modulefiles/Core:/etc/modulefiles/env:/etc/modulefiles/class:/etc/modulefiles/software
+NIX_SSL_CERT_FILE		: /etc/ssl/certs/ca-certificates.crt
+LMOD_CMD		: /usr/share/lmod/lmod/libexec/lmod
+_CE_M
+KRB5CCNAME		: FILE:/tmp/krb5cc_1055463_itHNAb
+SHLVL		: 1
+HOME		: /home/ys26
+LANGUAGE		: en_US:
+SHOST		: tyler
+BASH_ENV		: /usr/share/lmod/lmod/init/bash
+CONDA_PYTHON_EXE		: /home/ys26/anaconda3/bin/python
+LMOD_arch		: x86_64
+LOGNAME		: ys26
+XDG_DATA_DIRS		: /usr/local/share:/usr/share:/var/lib/snapd/desktop
+SOFTPATH		: /software
+SSH_CONNECTION		: 10.251.9.206 64268 130.126.136.179 22
+MODULESHOME		: /usr/share/lmod/lmod
+CONDA_DEFAULT_ENV		: base
+LMOD_SETTARG_FULL_SUPPORT		: no
+ARCH		: x86_64
+XDG_RUNTIME_DIR		: /run/user/1055463
+LMOD_DIR		: /usr/share/lmod/lmod/libexec
+NIX_USER_PROFILE_DIR		: /nix/var/nix/profiles/per-user/ys26
+BASH_FUNC_module%%		: () {  eval $($LMOD_CMD bash "$@") && eval $(${LMOD_SETTARG_CMD:-:} -s sh)
+
+Users
+-----
+
+-Users-
+root		: root
+daemon		: daemon
+bin		: bin
+sys		: sys
+sync		: sync
+games		: games
+man		: man
+lp		: lp
+mail		: mail
+news		: news
+uucp		: uucp
+proxy		: proxy
+www-data		: www-data
+backup		: backup
+list		: Mailing List Manager
+irc		: ircd
+gnats		: Gnats Bug-Reporting System (admin)
+nobody		: nobody
+systemd-timesync		: systemd Time Synchronization
+systemd-network		: systemd Network Management
+systemd-resolve		: systemd Resolver
+systemd-bus-proxy		: systemd Bus Proxy
+syslog
+_apt
+messagebus
+lxd
+dnsmasq		: dnsmasq
+sshd
+statd
+postfix
+colord		: colord colour management daemon
+ntp
+clamav
+sensu		: Sensu Monitoring Framework
+telegraf
+lightdm		: Light Display Manager
+whoopsie
+avahi-autoipd		: Avahi autoip daemon
+avahi		: Avahi mDNS daemon
+speech-dispatcher		: Speech Dispatcher
+hplip		: HPLIP system user
+kernoops		: Kernel Oops Tracking Daemon
+pulse		: PulseAudio daemon
+rtkit		: RealtimeKit
+saned
+usbmux		: usbmux daemon
+nixbld1		: Nix build user 1
+nixbld2		: Nix build user 2
+nixbld3		: Nix build user 3
+nixbld4		: Nix build user 4
+nixbld5		: Nix build user 5
+nixbld6		: Nix build user 6
+nixbld7		: Nix build user 7
+nixbld8		: Nix build user 8
+nixbld9		: Nix build user 9
+nixbld10		: Nix build user 10
+nixbld11		: Nix build user 11
+nixbld12		: Nix build user 12
+nixbld13		: Nix build user 13
+nixbld14		: Nix build user 14
+nixbld15		: Nix build user 15
+nixbld16		: Nix build user 16
+nixbld17		: Nix build user 17
+nixbld18		: Nix build user 18
+nixbld19		: Nix build user 19
+nixbld20		: Nix build user 20
+nixbld21		: Nix build user 21
+nixbld22		: Nix build user 22
+nixbld23		: Nix build user 23
+nixbld24		: Nix build user 24
+nixbld25		: Nix build user 25
+nixbld26		: Nix build user 26
+nixbld27		: Nix build user 27
+nixbld28		: Nix build user 28
+nixbld29		: Nix build user 29
+nixbld30		: Nix build user 30
+nixbld31		: Nix build user 31
+nixbld32		: Nix build user 32
+
+Devices
+*******
+
+
+Processor
+---------
+
+-Processors-
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2599.98MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2602.97MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2456.45MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2601.63MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.07MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.02MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.03MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.19MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2695.21MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2715.75MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2715.46MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 1424.73MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 1787.70MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2711.77MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 1353.80MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2710.79MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2709.20MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2642.89MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2449.84MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.05MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2599.95MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2600.04MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2706.53MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2675.88MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2792.21MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 1358.61MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 1748.81MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2611.28MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 1373.20MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2705.52MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2736.24MHz
+Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz		: 2702.87MHz
+
+Memory
+------
+
+-Memory-
+Total Memory		: 65830480 kB
+Free Memory		: 14031276 kB
+MemAvailable		: 38320820 kB
+Buffers		: 398312 kB
+Cached		: 23096348 kB
+Cached Swap		: 126052 kB
+Active		: 33001128 kB
+Inactive		: 14469604 kB
+Active(anon)		: 23204468 kB
+Inactive(anon)		: 830232 kB
+Active(file)		: 9796660 kB
+Inactive(file)		: 13639372 kB
+Unevictable		: 3684 kB
+Mlocked		: 3684 kB
+Virtual Memory		: 3997692 kB
+Free Virtual Memory		: 2660556 kB
+Dirty		: 136 kB
+Writeback		: 8 kB
+AnonPages		: 23971776 kB
+Mapped		: 1338980 kB
+Shmem		: 63004 kB
+Slab		: 3122640 kB
+SReclaimable		: 1516712 kB
+SUnreclaim		: 1605928 kB
+KernelStack		: 30624 kB
+PageTables		: 108936 kB
+NFS_Unstable		: 0 kB
+Bounce		: 0 kB
+WritebackTmp		: 0 kB
+CommitLimit		: 36912932 kB
+Committed_AS		: 64285764 kB
+VmallocTotal		: 34359738367 kB
+VmallocUsed		: 0 kB
+VmallocChunk		: 0 kB
+HardwareCorrupted		: 0 kB
+AnonHugePages		: 2934784 kB
+ShmemHugePages		: 0 kB
+ShmemPmdMapped		: 0 kB
+CmaTotal		: 0 kB
+CmaFree		: 0 kB
+HugePages_Total		: 0
+HugePages_Free		: 0
+HugePages_Rsvd		: 0
+HugePages_Surp		: 0
+Hugepagesize		: 2048 kB
+DirectMap4k		: 63504064 kB
+DirectMap2M		: 3469312 kB
+DirectMap1G		: 2097152 kB
+
+PCI Devices
+-----------
+
+-PCI Devices-
+Host bridge		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D DMI2 (rev 01)
+PCI bridge		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D PCI Express Root Port 1 (rev 01) (prog-if 00 [Normal decode])
+PCI bridge		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D PCI Express Root Port 3 (rev 01) (prog-if 00 [Normal decode])
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 1 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 2 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 3 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 4 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 5 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 6 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 7 (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Map/VTd_Misc/System Management (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D IIO Hot Plug (rev 01)
+System peripheral		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D IIO RAS/Control Status/Global Errors (rev 01)
+PIC		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D I/O APIC (rev 01) (prog-if 20 [IO(X)-APIC])
+
+USB Devices
+-----------
+
+
+Printers
+--------
+
+-Printers-
+No printers found
+
+Battery
+-------
+
+-No batteries-
+No batteries found on this system
+
+Sensors
+-------
+
+-Cooling Fans-
+-Temperatures-
+-Voltage Values-
+
+Input Devices
+-------------
+
+-Input Devices-
+ Power Button
+ Power Button
+ HDA Intel PCH Front Mic
+ HDA Intel PCH Rear Mic
+ HDA Intel PCH Line
+ HDA Intel PCH Line Out Front
+ HDA Intel PCH Line Out Surround
+ HDA Intel PCH Line Out CLFE
+ HDA Intel PCH Front Headphone
+ HDA NVidia HDMI/DP,pcm		: 3=
+ HDA NVidia HDMI/DP,pcm		: 7=
+ HDA NVidia HDMI/DP,pcm		: 8=
+ HDA NVidia HDMI/DP,pcm		: 9=
+ HDA NVidia HDMI/DP,pcm		: 3=
+ HDA NVidia HDMI/DP,pcm		: 7=
+ HDA NVidia HDMI/DP,pcm		: 8=
+ HDA NVidia HDMI/DP,pcm		: 9=
+ Dell Dell USB Keyboard
+ Logitech USB-PS/2 Optical Mouse
+
+Storage
+-------
+
+-SCSI Disks-
+ATA Samsung SSD 850
+ATA Samsung SSD 850
+Generic Ultra HS-SD/MMC
+
+DMI
+---
+
+-BIOS-
+Date		: 11/09/2016
+Vendor		: American Megatrends Inc. (www.ami.com)
+Version		: 2.0a
+-Board-
+Name		: X10DAI
+Vendor		: Supermicro
+
+Resources
+---------
+
+-I/O Ports-
+<tt>0000-0000 </tt>		: PCI Bus 0000:80
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>0000-0000 </tt>		: PCI Bus 0000:80
+<tt>0000-0000 </tt>		: PCI Bus 0000:80
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+<tt>0000-0000 </tt>		: PCI Bus 0000:80
+<tt>  0000-0000 </tt>		: PCI Bus 0000:81
+<tt>    0000-0000 </tt>		: NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller])
+-Memory-
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>      00000000-00000000 </tt>		: ICH HD audio
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>00000000-00000000 </tt>		: PCI Bus 0000:80
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+<tt>  00000000-00000000 </tt>		: Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01)
+<tt>    00000000-00000000 </tt>		: ioatdma
+-DMA-
+<tt> 4</tt>		: cascade
+
+Network
+*******
+
+
+Interfaces
+----------
+
+-Network Interfaces-
+eth0	4338573.20MiB	13789563.94MiB	130.126.136.179	
+eth1	0.00MiB	0.00MiB		
+lo	4132.02MiB	4132.02MiB	127.0.0.1	
+
+IP Connections
+--------------
+
+-Connections-
+0.0.0.0:22	LISTEN	0.0.0.0:*	tcp	
+127.0.0.1:3030		0.0.0.0:*	udp	
+127.0.0.1:631	LISTEN	0.0.0.0:*	tcp	
+127.0.0.1:3031	LISTEN	0.0.0.0:*	tcp	
+127.0.0.1:8888	LISTEN	0.0.0.0:*	tcp	
+127.0.0.1:25	LISTEN	0.0.0.0:*	tcp	
+127.0.0.1:6010	ESTABLISHED	127.0.0.1:59498	tcp	
+0.0.0.0:445	LISTEN	0.0.0.0:*	tcp	
+127.0.0.1:34409	LISTEN	0.0.0.0:*	tcp	
+0.0.0.0:139	LISTEN	0.0.0.0:*	tcp	
+0.0.0.0:47279	LISTEN	0.0.0.0:*	tcp	
+0.0.0.0:111		0.0.0.0:*	udp	
+0.0.0.0:39059	LISTEN	0.0.0.0:*	tcp	
+127.0.1.1:53		0.0.0.0:*	udp	
+127.0.0.1:6010	ESTABLISHED	127.0.0.1:59498	tcp	
+127.0.0.1:59498	ESTABLISHED	127.0.0.1:6010	tcp	
+130.126.136.179:765	ESTABLISHED	172.22.98.29:33920	tcp	
+130.126.136.179:40932	ESTABLISHED	172.22.246.11:4505	tcp	
+130.126.136.179:789	ESTABLISHED	130.126.112.49:2049	tcp	
+130.126.136.179:58444	ESTABLISHED	172.22.6.74:5672	tcp	
+130.126.136.179:37674	ESTABLISHED	172.22.6.146:8086	tcp	
+130.126.136.179:58446	ESTABLISHED	172.22.6.74:5672	tcp	
+130.126.136.179:22	ESTABLISHED	10.251.18.206:65464	tcp	
+130.126.136.179:22	ESTABLISHED	10.251.18.206:65464	tcp	
+130.126.136.179:45112	TIME_WAIT	172.22.98.29:111	tcp	
+130.126.136.179:58442	ESTABLISHED	172.22.6.74:5672	tcp	
+130.126.136.179:33326	ESTABLISHED	172.22.246.13:4505	tcp	
+130.126.136.179:59164	ESTABLISHED	172.22.246.140:4505	tcp	
+130.126.136.179:22	ESTABLISHED	10.251.18.206:65464	tcp	
+130.126.136.179:58440	ESTABLISHED	172.22.6.74:5672	tcp	
+130.126.136.179:745	ESTABLISHED	172.22.98.29:2049	tcp	
+130.126.136.179:51970	ESTABLISHED	172.22.246.14:10514	tcp	
+130.126.136.179:22	ESTABLISHED	10.251.18.206:65464	tcp	
+:::22	LISTEN	:::*	tcp6	
+::1:631	LISTEN	:::*	tcp6	
+::1:6010	LISTEN	:::*	tcp6	
+:::445	LISTEN	:::*	tcp6	
+:::44513	LISTEN	:::*	tcp6	
+:::139	LISTEN	:::*	tcp6	
+:::111		:::*	udp6	
+:::35349	LISTEN	:::*	tcp6	
+0.0.0.0:59373		0.0.0.0:*	udp	
+127.0.1.1:53		0.0.0.0:*	udp	
+0.0.0.0:68		0.0.0.0:*	udp	
+0.0.0.0:111		0.0.0.0:*	udp	
+130.126.136.179:123		0.0.0.0:*	udp	
+127.0.0.1:123		0.0.0.0:*	udp	
+0.0.0.0:123		0.0.0.0:*	udp	
+130.126.139.255:137		0.0.0.0:*	udp	
+130.126.136.179:137		0.0.0.0:*	udp	
+0.0.0.0:137		0.0.0.0:*	udp	
+130.126.139.255:138		0.0.0.0:*	udp	
+130.126.136.179:138		0.0.0.0:*	udp	
+0.0.0.0:138		0.0.0.0:*	udp	
+0.0.0.0:631		0.0.0.0:*	udp	
+127.0.0.1:762		0.0.0.0:*	udp	
+0.0.0.0:773		0.0.0.0:*	udp	
+0.0.0.0:34156		0.0.0.0:*	udp	
+0.0.0.0:34688		0.0.0.0:*	udp	
+127.0.0.1:3030		0.0.0.0:*	udp	
+0.0.0.0:37696		0.0.0.0:*	udp	
+0.0.0.0:5353		0.0.0.0:*	udp	
+0.0.0.0:42410		0.0.0.0:*	udp	
+:::111		:::*	udp6	
+2620:0:e00:550a:5c4:123		:::*	udp6	
+2620:0:e00:550a:4cd:123		:::*	udp6	
+2620:0:e00:550a:f5e:123		:::*	udp6	
+2620:0:e00:550a:c5b:123		:::*	udp6	
+2620:0:e00:550a:cd1:123		:::*	udp6	
+2620:0:e00:550a:dcf:123		:::*	udp6	
+2620:0:e00:550a:d8d:123		:::*	udp6	
+fe80::ec4:7aff:fedc:123		:::*	udp6	
+2620:0:e00:550a:ec4:123		:::*	udp6	
+::1:123		:::*	udp6	
+:::123		:::*	udp6	
+:::773		:::*	udp6	
+:::37708		:::*	udp6	
+:::5353		:::*	udp6	
+:::39262		:::*	udp6	
+:::47288		:::*	udp6	
+:::55076		:::*	udp6	
+
+Routing Table
+-------------
+
+-IP routing table-
+0.0.0.0 / 130.126.136.1	0.0.0.0	UG	eth0	
+130.126.136.0 / 0.0.0.0	255.255.252.0	U	eth0	
+169.254.0.0 / 0.0.0.0	255.255.0.0	U	eth0	
+192.17.2.10 / 130.126.136.1	255.255.255.255	UGH	eth0	
+
+ARP Table
+---------
+
+-ARP Table-
+130.126.136.121	90:b1:1c:2d:31:0e	eth0	
+130.126.136.198	40:a8:f0:47:38:97	eth0	
+130.126.136.1	00:fe:c8:5d:df:ff	eth0	
+130.126.136.110	98:10:e8:f3:a2:4f	eth0	
+130.126.136.247	00:11:32:c3:57:3f	eth0	
+130.126.136.159	c4:34:6b:5e:26:eb	eth0	
+130.126.136.243	00:4e:01:bf:b6:ea	eth0	
+130.126.136.77	bc:30:5b:e0:f9:06	eth0	
+130.126.139.44	98:90:96:dc:55:d9	eth0	
+130.126.136.14	4c:d9:8f:17:1c:e2	eth0	
+130.126.136.73	18:66:da:31:06:6b	eth0	
+130.126.136.63	18:66:da:3d:e2:68	eth0	
+169.254.169.254	00:fe:c8:5d:df:ff	eth0	
+130.126.136.126	20:04:0f:f4:66:74	eth0	
+130.126.139.118	70:85:c2:83:33:0b	eth0	
+130.126.136.210	2c:4d:54:46:4f:ed	eth0	
+130.126.136.195	48:4d:7e:f5:03:b2	eth0	
+130.126.136.55	40:a8:f0:5e:b9:9f	eth0	
+130.126.136.118	90:b1:1c:1a:02:d6	eth0	
+130.126.136.156	04:d4:c4:5b:50:f4	eth0	
+130.126.136.240	e4:54:e8:79:44:68	eth0	
+130.126.136.162	88:51:fb:58:dc:b2	eth0	
+130.126.136.148	6c:2b:59:d5:f9:d2	eth0	
+130.126.136.11	8c:dc:d4:29:8d:28	eth0	
+130.126.136.74	c8:2a:14:1e:91:67	eth0	
+130.126.138.79	74:e6:e2:da:8b:93	eth0	
+130.126.136.7	18:03:73:3c:2e:f1	eth0	
+130.126.136.129	00:25:00:ed:73:7d	eth0	
+130.126.136.245	00:04:4b:dd:43:42	eth0	
+130.126.136.119	90:b1:1c:2d:2a:90	eth0	
+130.126.136.182	90:b1:1c:5e:2c:72	eth0	
+130.126.136.241	e4:54:e8:79:52:be	eth0	
+130.126.136.100	64:00:6a:5f:2e:25	eth0	
+130.126.139.197	6c:3b:e5:14:6c:8f	eth0	
+130.126.136.12	54:bf:64:99:59:02	eth0	
+130.126.136.23	8c:ec:4b:b2:4f:c9	eth0	
+130.126.136.201	fc:aa:14:2f:a7:ab	eth0	
+130.126.136.191	40:6c:8f:24:b1:a4	eth0	
+130.126.138.206	18:66:da:0d:6f:d0	eth0	
+130.126.136.120	90:b1:1c:2d:2e:f8	eth0	
+130.126.136.46	48:4d:7e:e6:a4:16	eth0	
+130.126.138.135	38:c9:86:02:0b:99	eth0	
+130.126.136.105	d0:94:66:7b:67:c0	eth0	
+130.126.136.221	f8:b1:56:c6:32:86	eth0	
+130.126.136.95	00:24:e8:44:aa:a7	eth0	
+130.126.136.28	ac:1f:6b:48:8a:84	eth0	
+130.126.136.242	00:4e:01:bf:6c:ca	eth0	
+130.126.136.76	18:66:da:00:bb:74	eth0	
+130.126.136.97	98:90:96:bc:6f:66	eth0	
+130.126.136.160	ac:87:a3:1f:4e:ca	eth0	
+130.126.136.9	6c:2b:59:c7:ad:19	eth0	
+130.126.136.72	50:9a:4c:59:c3:5a	eth0	
+130.126.136.209	18:66:da:30:64:45	eth0	
+130.126.136.83	00:13:3b:11:6a:a1	eth0	
+130.126.136.5	4c:d9:8f:0c:1e:02	eth0	
+
+DNS Servers
+-----------
+
+-Name servers-
+127.0.1.1
+
+Statistics
+----------
+
+-IP-
+3340235186		: Total packets received
+99269		: With invalid addresses
+0		: Incoming packets discarded
+0		: Incoming packets discarded
+3340132854		: Incoming packets delivered
+1109643438		: Requests sent out
+16		: Outgoing packets dropped
+248929		: Dropped because of missing route
+6126		: Reassemblies required
+3063		: Packets reassembled ok
+-ICMP-
+1355		: ICMP messages received
+19		: Input ICMP message failed.
+7599		: ICMP messages sent
+0		: ICMP messages failed
+-ICMPMSG-
+-TCP-
+26139644		: Active connections openings
+3919		: Passive connection openings
+25871410		: Failed connection attempts
+4562		: Connection resets received
+18		: Connections established
+3302346907		: Segments received
+10359040645		: Segments send out
+2844431		: Segments retransmited
+1		: Bad segments received.
+25878380		: Resets sent
+-UDP-
+24979118		: Packets received
+6362		: Packets to unknown port received.
+4203		: Packet receive errors
+662177		: Packets sent
+-UDPLITE-
+-TCPEXT-
+1		: Resets received for embryonic SYN_RECV sockets
+235858		: TCP sockets finished time wait in fast timer
+171		: Time wait sockets recycled by time stamp
+1825		: Packets rejects in established connections because of timestamp
+6984717		: Delayed acks sent
+3194		: Delayed acks further delayed because of locked socket
+527487290		: Packet headers predicted
+543016884		: Acknowledgments not containing data payload received
+1893599141		: Predicted acknowledgments
+554129		: Times recovered from packet loss by selective acknowledgements
+119		: Congestion windows fully recovered without slow start
+85		: Congestion windows partially recovered using Hoe heuristic
+6456		: Congestion windows recovered without slow start by DSACK
+1683		: Congestion windows recovered without slow start after partial ack
+101		: Timeouts after SACK recovery
+136		: Timeouts in loss state
+2738885		: Fast retransmits
+2730		: Retransmits in slow start
+3141		: Other TCP timeouts
+820		: SACK retransmits failed
+32589		: DSACKs sent for old packets
+607		: DSACKs sent for out of order packets
+84876		: DSACKs received
+567		: DSACKs for out of order packets received
+650		: Connections reset due to unexpected data
+4384		: Connections reset due to early user close
+286		: Connections aborted due to timeout
+3		: Times unabled to send RST due to no memory
+-IPEXT-
+
+Shared Directories
+------------------
+
+-SAMBA-
+-NFS-
+
+Benchmarks
+**********
+
+
+CPU Blowfish
+------------
+
+-CPU Blowfish-
+<big><b>This Machine</b></big>	2600 MHz	0.665	
+Intel(R) Celeron(R) M processor         1.50GHz	(null)	26.1876862	
+PowerPC 740/750 (280.00MHz)	(null)	172.816713	
+
+CPU CryptoHash
+--------------
+
+-CPU CryptoHash-
+<big><b>This Machine</b></big>	2600 MHz	1697.719	
+
+CPU Fibonacci
+-------------
+
+-CPU Fibonacci-
+<big><b>This Machine</b></big>	2600 MHz	2.166	
+Intel(R) Celeron(R) M processor         1.50GHz	(null)	8.1375674	
+PowerPC 740/750 (280.00MHz)	(null)	58.07682	
+
+CPU N-Queens
+------------
+
+-CPU N-Queens-
+<big><b>This Machine</b></big>	2600 MHz	0.592	
+
+FPU FFT
+-------
+
+-FPU FFT-
+<big><b>This Machine</b></big>	2600 MHz	1.072	
+
+FPU Raytracing
+--------------
+
+-FPU Raytracing-
+<big><b>This Machine</b></big>	2600 MHz	6.178	
+Intel(R) Celeron(R) M processor         1.50GHz	(null)	40.8816714	
+PowerPC 740/750 (280.00MHz)	(null)	161.312647	
diff --git a/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb b/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..bbac5d491d80f5f4aaa9286b2323704e55555b48
--- /dev/null
+++ b/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import numpy as np\n",
+    "import onnx\n",
+    "import glob\n",
+    "from onnxruntime.backend.backend import OnnxRuntimeBackend as backend\n",
+    "\n",
+    "from onnx import numpy_helper\n",
+    "\n",
+    "# onnx2hpvm modules\n",
+    "from onnx2hpvm.onnx_translator import from_onnx_to_hpvm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = onnx.load('../models/mnist/mnist.onnx')\n",
+    "test_data_dir = '../models/mnist/test_data_set_0'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load inputs\n",
+    "inputs = []\n",
+    "inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))\n",
+    "print(inputs_num)\n",
+    "for i in range(inputs_num):\n",
+    "    input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))\n",
+    "    tensor = onnx.TensorProto()\n",
+    "    with open(input_file, 'rb') as f:\n",
+    "        tensor.ParseFromString(f.read())\n",
+    "    inputs.append(numpy_helper.to_array(tensor))\n",
+    "\n",
+    "# Load reference outputs\n",
+    "ref_outputs = []\n",
+    "ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb')))\n",
+    "for i in range(ref_outputs_num):\n",
+    "    output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i))\n",
+    "    tensor = onnx.TensorProto()\n",
+    "    with open(output_file, 'rb') as f:\n",
+    "        tensor.ParseFromString(f.read())\n",
+    "    ref_outputs.append(numpy_helper.to_array(tensor))\n",
+    "\n",
+    "# Run the model on the backend\n",
+    "outputs = list(backend.run_model(model, inputs))\n",
+    "\n",
+    "#from_onnx_to_hpvm(model)\n",
+    "# Compare the results with reference outputs.\n",
+    "#for ref_o, o in zip(ref_outputs, outputs):\n",
+    "#    np.testing.assert_almost_equal(ref_o, o)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/alexnet_keras.ipynb b/hpvm/projects/onnx/src/alexnet_keras.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..3e46e220eb5438188d19e06934c6a9ef780cfa21
--- /dev/null
+++ b/hpvm/projects/onnx/src/alexnet_keras.ipynb
@@ -0,0 +1,356 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import cifar10\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers.core import Dense, Dropout, Flatten, Activation\n",
+    "from keras.layers.convolutional import Conv2D\n",
+    "from keras.optimizers import Adam\n",
+    "from keras.layers.pooling import MaxPooling2D\n",
+    "from keras.utils.np_utils import to_categorical\n",
+    "from keras.preprocessing.image import ImageDataGenerator\n",
+    "from keras import backend as K\n",
+    "from keras import regularizers\n",
+    "from keras.callbacks import LearningRateScheduler\n",
+    "import sys\n",
+    "import struct\n",
+    "import keras\n",
+    "import numpy as np\n",
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def buildModel2():\n",
+    "\n",
+    "    activation_type = \"tanh\"\n",
+    "    weight_decay = 1e-4\n",
+    "    \n",
+    "    model = Sequential()\n",
+    "    model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type,\n",
+    "                     input_shape=(3, 32, 32), padding = 'same',\n",
+    "                     kernel_regularizer=regularizers.l2(weight_decay) ))\n",
+    "    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n",
+    "    model.add(Dropout(0.2))\n",
+    "    model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same',\n",
+    "                     kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n",
+    "    model.add(Dropout(0.3))\n",
+    "\n",
+    "    model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n",
+    "                     kernel_regularizer=regularizers.l2(weight_decay) ))   \n",
+    "    model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n",
+    "                     kernel_regularizer=regularizers.l2(weight_decay) ))\n",
+    "    model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n",
+    "                     kernel_regularizer=regularizers.l2(weight_decay) ))\n",
+    "    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n",
+    "    model.add(Dropout(0.4))\n",
+    "\n",
+    "    model.add(Flatten())\n",
+    "    #model.add(Flatten())\n",
+    "    #model.add(Dense(256))\n",
+    "    model.add(Dense(10))\n",
+    "    model.add(Activation('softmax'))\n",
+    "    \n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def lr_schedule(epoch):\n",
+    "  lrate = 0.001\n",
+    "  if epoch > 20:\n",
+    "    lrate = 0.0005\n",
+    "  if epoch > 40:\n",
+    "    lrate = 0.0003\n",
+    "  if epoch > 60:\n",
+    "    lrate = 0.0001\n",
+    "  if epoch > 80:\n",
+    "    lrate = 0.00005  \n",
+    "    \n",
+    "  return lrate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def trainModel(model):    \n",
+    "    #dir_prefix = \"/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/\"\n",
+    "\n",
+    "    #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)\n",
+    "    # Compile the model\n",
+    "    model.compile(loss='categorical_crossentropy',\n",
+    "                  optimizer=Adam(lr=0.0001, decay=1e-6),\n",
+    "                  #optimizer = opt_rms,\n",
+    "                  metrics=['accuracy'])\n",
+    "\n",
+    "    #print to_categorical(Y_train, 10)\n",
+    "    print (to_categorical(Y_train))\n",
+    "\n",
+    "\n",
+    "    datagen = ImageDataGenerator(\n",
+    "        rotation_range=15,\n",
+    "        width_shift_range=0.1,\n",
+    "        height_shift_range=0.1,\n",
+    "        horizontal_flip=True,\n",
+    "        )\n",
+    "    datagen.fit(X_train)\n",
+    "\n",
+    "    \n",
+    "    model.fit(X_train, to_categorical(Y_train, 10),\n",
+    "              batch_size=128,\n",
+    "              shuffle=True,\n",
+    "              epochs = 1,\n",
+    "              #epochs=100,\n",
+    "              validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)])\n",
+    "\n",
+    "    # Evaluate the model\n",
+    "    scores = model.evaluate(X_test.astype('float32'), to_categorical(Y_test, 10))\n",
+    "\n",
+    "    print('Loss: %.3f' % scores[0])\n",
+    "    print('Accuracy: %.3f' % scores[1])\n",
+    "        \n",
+    "    print (\"*** TRAINED MODEL ****\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n",
+    "test_labels = Y_test\n",
+    "train_labels = Y_train\n",
+    "\n",
+    "#X_train = X_train.astype('float32')\n",
+    "#X_test = X_test.astype('float32')\n",
+    "X_train = X_train / 255.0\n",
+    "X_test = X_test / 255.0\n",
+    "\n",
+    "mean = np.mean(X_train,axis=(0,1,2,3))\n",
+    "std = np.std(X_train,axis=(0,1,2,3))   \n",
+    "X_train = (X_train-mean)/(std+1e-7)\n",
+    "X_test = (X_test-mean)/(std+1e-7)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train = X_train[:8000]\n",
+    "X_test = X_test[:8000]\n",
+    "Y_train = Y_train[:8000]\n",
+    "Y_test = Y_test[:8000]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0. 0. 0. ... 0. 0. 0.]\n",
+      " [0. 0. 0. ... 0. 0. 1.]\n",
+      " [0. 0. 0. ... 0. 0. 1.]\n",
+      " ...\n",
+      " [0. 0. 0. ... 0. 0. 1.]\n",
+      " [0. 1. 0. ... 0. 0. 0.]\n",
+      " [0. 1. 0. ... 0. 0. 0.]]\n",
+      "WARNING:tensorflow:Variable *= will be deprecated. Use `var.assign(var * other)` if you want assignment to the variable value or `x = x * y` if you want a new python Tensor object.\n",
+      "Train on 50000 samples, validate on 10000 samples\n",
+      "Epoch 1/1\n",
+      "50000/50000 [==============================] - 12s 232us/step - loss: 2.0118 - acc: 0.3704 - val_loss: 1.8003 - val_acc: 0.4312\n",
+      "10000/10000 [==============================] - 1s 150us/step\n",
+      "Loss: 1.800\n",
+      "Accuracy: 0.431\n",
+      "*** TRAINED MODEL ****\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = buildModel2()\n",
+    "trainModel(model)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time: 1.0208089351654053\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "start = time.time()\n",
+    "keras_result = model.predict(X_test[:8000])\n",
+    "print(\"time:\", time.time() - start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras2onnx\n",
+    "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n",
+    "import onnx\n",
+    "onnx.save(onnx_model, \"../models/keras/alexnet8k.onnx\")\n",
+    "import pickle\n",
+    "with open('keras_dump', 'wb') as fp:\n",
+    "    pickle.dump(keras_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#onnx_model = keras2onnx.convert_keras(model, model.name)\n",
+    "import onnxruntime\n",
+    "sess = onnxruntime.InferenceSession(\"../models/keras/alexnet.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_name = sess.get_inputs()[0].name\n",
+    "print(\"Input name  :\", input_name)\n",
+    "input_shape = sess.get_inputs()[0].shape\n",
+    "print(\"Input shape :\", input_shape)\n",
+    "input_type = sess.get_inputs()[0].type\n",
+    "print(\"Input type  :\", input_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_name = sess.get_outputs()[0].name\n",
+    "print(\"Output name  :\", output_name)  \n",
+    "output_shape = sess.get_outputs()[0].shape\n",
+    "print(\"Output shape :\", output_shape)\n",
+    "output_type = sess.get_outputs()[0].type\n",
+    "print(\"Output type  :\", output_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "#K.clear_session()\n",
+    "start = time.time()\n",
+    "ort_result = sess.run([output_name], {input_name: X_test.astype('float32')})\n",
+    "import pickle\n",
+    "\n",
+    "with open('dumps/ort_dump', 'wb') as fp:\n",
+    "    pickle.dump(ort_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('ort_dump', 'rb') as fp:\n",
+    "    ort_res = pickle.load(fp)\n",
+    "with open ('keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(ort_res[0], keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 5)  #using decimal of 3 would pass test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(ort_res[0])\n",
+    "print(\"--------------\")\n",
+    "print(keras_res)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/alexnet_onnx.ipynb b/hpvm/projects/onnx/src/alexnet_onnx.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..73755ecf1ccc27eedc5f1319c20e30473936c3e8
--- /dev/null
+++ b/hpvm/projects/onnx/src/alexnet_onnx.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import cifar10\n",
+    "from keras import backend as K\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import keras2onnx\n",
+    "import onnx\n",
+    "import onnxruntime\n",
+    "import time\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n",
+    "test_labels = Y_test\n",
+    "train_labels = Y_train\n",
+    "\n",
+    "#X_train = X_train.astype('float32')\n",
+    "#X_test = X_test.astype('float32')\n",
+    "X_train = X_train / 255.0\n",
+    "X_test = X_test / 255.0\n",
+    "\n",
+    "mean = np.mean(X_train,axis=(0,1,2,3))\n",
+    "std = np.std(X_train,axis=(0,1,2,3))   \n",
+    "X_train = (X_train-mean)/(std+1e-7)\n",
+    "X_test = (X_test-mean)/(std+1e-7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sess = onnxruntime.InferenceSession(\"../models/keras/alexnet.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input name  : conv2d_1_input\n",
+      "Input shape : [None, 3, 32, 32]\n",
+      "Input type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = sess.get_inputs()[0].name\n",
+    "print(\"Input name  :\", input_name)\n",
+    "input_shape = sess.get_inputs()[0].shape\n",
+    "print(\"Input shape :\", input_shape)\n",
+    "input_type = sess.get_inputs()[0].type\n",
+    "print(\"Input type  :\", input_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output name  : activation_1\n",
+      "Output shape : [None, 10]\n",
+      "Output type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "output_name = sess.get_outputs()[0].name\n",
+    "print(\"Output name  :\", output_name)  \n",
+    "output_shape = sess.get_outputs()[0].shape\n",
+    "print(\"Output shape :\", output_shape)\n",
+    "output_type = sess.get_outputs()[0].type\n",
+    "print(\"Output type  :\", output_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time:  1.3165459632873535\n"
+     ]
+    }
+   ],
+   "source": [
+    "#X_test = np.moveaxis(X_test, -1, 1)\n",
+    "X_test.shape\n",
+    "#K.clear_session()\n",
+    "start_time = time.time()\n",
+    "ort_result = sess.run([output_name], {input_name: X_test.astype('float32')[:8000]})\n",
+    "print(\"time: \", time.time() - start_time)\n",
+    "import pickle\n",
+    "\n",
+    "with open('dumps/ort_dump', 'wb') as fp:\n",
+    "    pickle.dump(ort_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'dumps/keras_dump'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-20-a59ffee1b5dd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'dumps/ort_dump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mort_res\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'dumps/keras_dump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0mkeras_res\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'dumps/keras_dump'"
+     ]
+    }
+   ],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/ort_dump', 'rb') as fp:\n",
+    "    ort_res = pickle.load(fp)\n",
+    "with open ('dumps/keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(ort_res[0], keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/alexnet_tvm.ipynb b/hpvm/projects/onnx/src/alexnet_tvm.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..eaa92800e5ef55e1de64cbbde9395872727a4e5b
--- /dev/null
+++ b/hpvm/projects/onnx/src/alexnet_tvm.ipynb
@@ -0,0 +1,225 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import cifar10\n",
+    "from keras import backend as K\n",
+    "import sys\n",
+    "import struct\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import tvm\n",
+    "import tvm.relay as relay\n",
+    "from tvm.contrib import graph_runtime\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "K.set_image_data_format('channels_last')\n",
+    "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n",
+    "X_test = X_test / 255.0\n",
+    "mean = np.mean(X_train,axis=(0,1,2,3))\n",
+    "std = np.std(X_train,axis=(0,1,2,3))   \n",
+    "X_test = (X_test-mean)/(std+1e-7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print(X_test.shape)\n",
+    "#X_test = X_test[:8000]\n",
+    "#print(X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnx\n",
+    "onnx_model = onnx.load(\"../models/keras/alexnet_last.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target='cuda -libs=cudnn,cublas'\n",
+    "input_name = 'conv2d_8_input'\n",
+    "shape_dict = {input_name: X_test[:1000].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "ctx = tvm.gpu()\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    executor = relay.build_module.create_executor('graph', mod, ctx, target)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# LLVM EXECUTE SUCCEEDED\n",
+    "import time\n",
+    "start_time = time.time()\n",
+    "tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')[:1000]), **params)\n",
+    "print(\"Time:\", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n",
+    "import pickle\n",
+    "with open('dumps/tvm_dump', 'wb') as fp:\n",
+    "    pickle.dump(tvm_output, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 4096, 'float32'), (10, 4096, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 256, 8, 8, 'float32'), (256, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 384, 8, 8, 'float32'), (256, 384, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 192, 8, 8, 'float32'), (384, 192, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 64, 16, 16, 'float32'), (192, 64, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 3, 32, 32, 'float32'), (64, 3, 11, 11, 'float32'), (1, 1), (5, 5), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = 'conv2d_8_input'\n",
+    "input_size = 8000\n",
+    "shape_dict = {input_name: X_test[:input_size].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "target = 'cuda -libs=cudnn,cublas'\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    graph, lib, params = relay.build(mod, target, params=params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TVMError",
+     "evalue": "Traceback (most recent call last):\n  [bt] (7) /home/ys26/tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f664ddc32b1]\n  [bt] (6) /home/ys26/tvm/build/libtvm.so(+0xbecd74) [0x7f664de16d74]\n  [bt] (5) /home/ys26/tvm/build/libtvm.so(+0xbecc19) [0x7f664de16c19]\n  [bt] (4) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntimeCreate(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module const&, std::vector<DLContext, std::allocator<DLContext> > const&)+0xcf) [0x7f664de16a1f]\n  [bt] (3) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Init(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module, std::vector<DLContext, std::allocator<DLContext> > const&)+0x258) [0x7f664de16698]\n  [bt] (2) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::SetupStorage()+0x65f) [0x7f664de1334f]\n  [bt] (1) /home/ys26/tvm/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x1ef) [0x7f664dd9fcaf]\n  [bt] (0) /home/ys26/tvm/build/libtvm.so(tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xde1) [0x7f664de226d1]\n  File \"/home/ys26/tvm/src/runtime/cuda/cuda_device_api.cc\", line 119\nCUDA: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: out of memory",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTVMError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-6-0bd1741a62f4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# create module\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mmodule\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_runtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;31m# set input and parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"conv2d_8_input\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"float32\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/tvm/python/tvm/contrib/graph_runtime.py\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(graph_json_str, libmod, ctx)\u001b[0m\n\u001b[1;32m     57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0mfcreate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_global_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tvm.graph_runtime.create\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mGraphModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph_json_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mdevice_type_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     61\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_device_ctx\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlibmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/tvm/python/tvm/_ffi/_ctypes/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    205\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtcodes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    206\u001b[0m                 ctypes.byref(ret_val), ctypes.byref(ret_tcode)) != 0:\n\u001b[0;32m--> 207\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mget_last_ffi_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    208\u001b[0m         \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    209\u001b[0m         \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mTVMError\u001b[0m: Traceback (most recent call last):\n  [bt] (7) /home/ys26/tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f664ddc32b1]\n  [bt] (6) /home/ys26/tvm/build/libtvm.so(+0xbecd74) [0x7f664de16d74]\n  [bt] (5) /home/ys26/tvm/build/libtvm.so(+0xbecc19) [0x7f664de16c19]\n  [bt] (4) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntimeCreate(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module const&, std::vector<DLContext, std::allocator<DLContext> > const&)+0xcf) [0x7f664de16a1f]\n  [bt] (3) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Init(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module, std::vector<DLContext, std::allocator<DLContext> > const&)+0x258) [0x7f664de16698]\n  [bt] (2) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::SetupStorage()+0x65f) [0x7f664de1334f]\n  [bt] (1) /home/ys26/tvm/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x1ef) [0x7f664dd9fcaf]\n  [bt] (0) /home/ys26/tvm/build/libtvm.so(tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xde1) [0x7f664de226d1]\n  File \"/home/ys26/tvm/src/runtime/cuda/cuda_device_api.cc\", line 119\nCUDA: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: out of memory"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "ctx = tvm.gpu()\n",
+    "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n",
+    "# create module\n",
+    "module = graph_runtime.create(graph, lib, ctx)\n",
+    "# set input and parameters\n",
+    "module.set_input(\"conv2d_8_input\", X_test[:input_size].astype(\"float32\"))\n",
+    "module.set_input(**params)\n",
+    "# run\n",
+    "start_time = time.time()\n",
+    "module.run()\n",
+    "out_shape = (input_size, 10)\n",
+    "# get output\n",
+    "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n",
+    "print(\"Time:\", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/tvm_dump', 'rb') as fp:\n",
+    "    tvm_res = pickle.load(fp)\n",
+    "with open ('dumps/keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(tvm_res, keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test\n",
+    "print(\"Accuracy matched!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/alexnet_tvm.py b/hpvm/projects/onnx/src/alexnet_tvm.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbbe8c6f8cb5f4e0cb98a467784ded7cfb384d31
--- /dev/null
+++ b/hpvm/projects/onnx/src/alexnet_tvm.py
@@ -0,0 +1,40 @@
+import numpy as np
+from keras.datasets import cifar10
+from keras import backend as K
+import sys
+import struct
+import numpy as np
+import os
+import tvm
+import tvm.relay as relay
+from tvm.contrib import graph_runtime
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+K.set_image_data_format('channels_last')
+(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+test_labels = Y_test
+train_labels = Y_train
+
+#X_train = X_train.astype('float32')
+#X_test = X_test.astype('float32')
+X_train = X_train / 255.0
+X_test = X_test / 255.0
+
+mean = np.mean(X_train,axis=(0,1,2,3))
+std = np.std(X_train,axis=(0,1,2,3))   
+X_train = (X_train-mean)/(std+1e-7)
+X_test = (X_test-mean)/(std+1e-7)
+
+import onnx
+onnx_model = onnx.load("../models/keras/alexnet_last.onnx")
+
+target = tvm.target.cuda()
+input_name = 'conv2d_8_input'
+shape_dict = {input_name: X_test.shape}
+mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+ctx = tvm.gpu(0)
+with relay.build_config(opt_level=3):
+    executor = relay.build_module.create_executor('graph', mod, ctx, target)
+    
+dtype = 'float32'
+tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')), **params)
\ No newline at end of file
diff --git a/hpvm/projects/onnx/src/keras_models/alexnet.py b/hpvm/projects/onnx/src/keras_models/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..b562c7d658969c8efe8dca20a0502475268a7af2
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/alexnet.py
@@ -0,0 +1,174 @@
+
+import numpy as np
+from keras.datasets import cifar10
+from keras.models import Sequential
+from keras.layers.core import Dense, Dropout, Flatten, Activation
+from keras.layers.convolutional import Conv2D
+from keras.optimizers import Adam
+from keras.layers.pooling import MaxPooling2D
+from keras.utils.np_utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras import backend as K
+from keras import regularizers
+from keras.callbacks import LearningRateScheduler
+import sys
+import struct
+import keras
+import numpy as np
+import os
+
+
+
+def lr_schedule(epoch):
+  lrate = 0.001
+  if epoch > 20:
+    lrate = 0.0005
+  if epoch > 40:
+    lrate = 0.0003
+  if epoch > 60:
+    lrate = 0.0001
+  if epoch > 80:
+    lrate = 0.00005  
+    
+  return lrate
+
+
+
+def buildModel2():
+
+    activation_type = "tanh"
+    weight_decay = 1e-4
+    
+    model = Sequential()
+    model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type,
+                     input_shape=(3, 32, 32), padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))
+    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
+    model.add(Dropout(0.2))
+    model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay)))
+    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
+    model.add(Dropout(0.3))
+
+    model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))   
+    model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))
+    model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',
+                     kernel_regularizer=regularizers.l2(weight_decay) ))
+    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
+    model.add(Dropout(0.4))
+
+    model.add(Flatten())
+    #model.add(Flatten())
+    #model.add(Dense(256))
+    model.add(Dense(10))
+    model.add(Activation('softmax'))
+    
+    return model
+
+
+
+def buildModel():
+    
+    model = Sequential()
+    model.add(Conv2D(128, kernel_size=(3, 3), activation='tanh', input_shape=(3, 32, 32), padding = 'same'))
+    model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+    #model.add(Dropout(0.25))
+
+    model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+    
+    model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+    model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+    #model.add(Dropout(0.25))
+
+    model.add(Flatten())
+    #model.add(Flatten())
+    model.add(Dense(4096, activation='tanh'))
+    #model.add(Dropout(0.5))
+    model.add(Dense(2048, activation='tanh'))
+    model.add(Dense(10, activation='tanh'))
+    model.add(Activation('softmax'))
+    
+    return model
+
+
+
+
+def trainModel(model):
+
+    (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+    test_labels = Y_test
+    train_labels = Y_train
+
+    #X_train = X_train.astype('float32')
+    #X_test = X_test.astype('float32')
+    X_train = X_train / 255.0
+    X_test = X_test / 255.0
+    
+    mean = np.mean(X_train,axis=(0,1,2,3))
+    std = np.std(X_train,axis=(0,1,2,3))   
+    X_train = (X_train-mean)/(std+1e-7)
+    X_test = (X_test-mean)/(std+1e-7)
+    
+    dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/"
+
+    #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)
+    # Compile the model
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=Adam(lr=0.0001, decay=1e-6),
+                  #optimizer = opt_rms,
+                  metrics=['accuracy'])
+
+    #print to_categorical(Y_train, 10)
+    print (to_categorical(Y_train))
+
+
+    datagen = ImageDataGenerator(
+        rotation_range=15,
+        width_shift_range=0.1,
+        height_shift_range=0.1,
+        horizontal_flip=True,
+        )
+    datagen.fit(X_train)
+
+    
+    model.fit(X_train, to_categorical(Y_train, 10),
+              batch_size=128,
+              shuffle=True,
+              epochs = 1,
+              #epochs=100,
+              validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)])
+
+    # Evaluate the model
+    scores = model.evaluate(X_test, to_categorical(Y_test, 10))
+
+    print('Loss: %.3f' % scores[0])
+    print('Accuracy: %.3f' % scores[1])
+        
+    print ("*** TRAINED MODEL ****\n")
+
+    
+    #dumpCalibrationData("calibration_data/alexnet_calib.bin", X_train,
+    #                    "calibration_data/alexnet_train_labels.bin", train_labels)
+    
+    #translate_to_approxhpvm(model, "data/alexnet_cifar10/", X_test, test_labels, 10)
+
+
+    
+if __name__ == "__main__":
+
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+    model = buildModel2()
+    trainModel(model)
+    import keras2onnx
+    onnx_model = keras2onnx.convert_keras(model, model.name)
+    import onnx
+    onnx.save(onnx_model, "../models/keras/alexnet.onnx")
diff --git a/hpvm/projects/onnx/src/keras_models/alexnet2.py b/hpvm/projects/onnx/src/keras_models/alexnet2.py
new file mode 100644
index 0000000000000000000000000000000000000000..812b212165666370092a3d55e8482643b550f830
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/alexnet2.py
@@ -0,0 +1,243 @@
+
+import keras
+from keras.models import Sequential
+from keras.utils import np_utils
+from keras.preprocessing.image import ImageDataGenerator
+from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
+from keras.layers import Conv2D, MaxPooling2D
+from keras.datasets import cifar10
+from keras import regularizers
+from keras.callbacks import LearningRateScheduler
+import numpy as np
+import os
+import struct
+from keras import backend as K
+from approxhpvm_translator import translate_to_approxhpvm
+
+
+
+def dumpWeights(file_name, weights, N, H, W, C):
+    # NOTE: Writing the NHWC weights array as NCHW
+    f = open(file_name, "wb")
+    for i in range(N):
+        for j in range(C):
+            for k in range(H):
+                for l in range(W):
+                    f.write(weights[i][k][l][j])
+
+    f.close()
+
+    
+def dumpConvWeights(file_name, weights, N, C, H, W):
+
+    print (weights.shape)
+    
+    f = open(file_name, "wb")
+    for i in range(N):
+        for j in range(C):
+            for k in range(H):
+                for l in range(W):
+                    f.write(weights[k][l][j][i])
+    f.close()
+
+
+    
+def dumpFcWeights(file_name, weights, H, W):
+
+    print (weights.shape)
+
+    f = open(file_name, "wb")
+    for i in range(H):
+        for j in range(W):
+            f.write(weights[i][j])
+    f.close()        
+
+
+def dumpFcBias(file_name, bias, W):
+
+    print (bias.shape)
+
+    f = open(file_name, "wb")
+    for i in range(W):
+        f.write(bias[i])
+    f.close()
+
+
+def dumpLabels(file_name, Y_test):
+
+    f = open(file_name, "wb")
+    
+    labels_map = {}    
+    for label in Y_test:
+        label_val = np.int8(label[0])
+        if label_val not in labels_map:
+            labels_map[label_val] = 0
+        labels_map[label_val] += 1
+
+        f.write(label_val)
+
+    f.close()
+    
+
+    
+def dumpData(X_test, file_name, N, C, H, W):
+
+    print (X_test.shape)
+    
+    f = open(file_name, "wb")
+    for i in range(N):
+        for j in range(C):
+            for k in range(H):
+                for l in range(W):
+                    val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l]))
+                    f.write(np.float32(val[0]))
+
+    f.close()
+
+
+
+    
+
+def lr_schedule(epoch):
+  lrate = 0.001
+  if epoch > 75:
+    lrate = 0.0005
+  if epoch > 100:
+    lrate = 0.0003
+  return lrate
+
+
+def lr_schedule2(epoch):
+  lrate = 0.0005
+  if epoch > 100:
+    lrate = 0.0003
+  if epoch > 200:
+    lrate = 0.0002
+  if epoch > 250:
+    lrate = 0.0001
+  if epoch > 300:
+    lrate = 0.00003
+
+  return lrate
+
+
+K.set_image_data_format('channels_first')
+
+(x_train, y_train), (x_test, y_test) = cifar10.load_data()
+test_labels = y_test
+x_train = x_train.astype('float32')
+x_test = x_test.astype('float32')
+
+#z-score
+mean = np.mean(x_train,axis=(0,1,2,3))
+std = np.std(x_train,axis=(0,1,2,3))
+x_train = (x_train-mean)/(std+1e-7)
+x_test = (x_test-mean)/(std+1e-7)
+
+
+# Dumping test data and test labels
+dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/"
+
+dumpLabels(dir_prefix + "test_labels.bin", y_test)
+dumpData(x_test, dir_prefix + "norm_cifar_input.bin", 10000, 3, 32, 32)
+
+
+
+num_classes = 10
+y_train = np_utils.to_categorical(y_train,num_classes)
+y_test = np_utils.to_categorical(y_test,num_classes)
+
+weight_decay = 1e-4
+activation_type = 'tanh'
+
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+
+model = Sequential()
+model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:]))
+model.add(Activation(activation_type))
+#model.add(BatchNormalization())
+model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+model.add(Activation(activation_type))
+#model.add(BatchNormalization())
+model.add(MaxPooling2D(pool_size=(2,2)))
+model.add(Dropout(0.2))
+
+model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+model.add(Activation(activation_type))
+#model.add(BatchNormalization())
+model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+model.add(Activation(activation_type))
+#model.add(BatchNormalization())
+model.add(MaxPooling2D(pool_size=(2,2)))
+model.add(Dropout(0.3))
+
+model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+model.add(Activation(activation_type))
+#model.add(BatchNormalization())
+model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+model.add(Activation(activation_type))
+#model.add(BatchNormalization())
+model.add(MaxPooling2D(pool_size=(2,2)))
+model.add(Dropout(0.4))
+
+model.add(Flatten())
+model.add(Dense(num_classes))
+model.add(Activation('softmax'))
+model.summary()
+
+#data augmentation
+datagen = ImageDataGenerator(
+        rotation_range=15,
+        width_shift_range=0.1,
+        height_shift_range=0.1,
+        horizontal_flip=True,
+        )
+
+datagen.fit(x_train)
+
+
+#training
+batch_size = 64
+
+opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)
+model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy'])
+model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),\
+                    steps_per_epoch=x_train.shape[0] // batch_size, #epochs=350,\
+                    epochs=1,
+                    verbose=1,validation_data=(x_test,y_test),callbacks=[LearningRateScheduler(lr_schedule2)])
+#save to disk
+model_json = model.to_json()
+with open('model.json', 'w') as json_file:
+  json_file.write(model_json)
+  model.save_weights('model.h5') 
+
+#testing
+scores = model.evaluate(x_test, y_test, batch_size=128, verbose=1)
+print('\nTest result: %.3f loss: %.3f' % (scores[1]*100,scores[0]))
+
+
+translate_to_approxhpvm(model, "alexnet2_cifar10_test/", x_test, test_labels, "alexnet2_cifar10/", y_test)
+sys.exit(0)
+
+
+
+params = model.get_weights()
+dumpConvWeights(dir_prefix + "conv1.bin", params[0], 32, 3, 3, 3)
+dumpFcBias(dir_prefix + "conv1_bias.bin", params[1], 32)
+dumpConvWeights(dir_prefix + "conv2.bin", params[2], 32, 32, 3, 3)
+dumpFcBias(dir_prefix + "conv2_bias.bin", params[3], 32)
+dumpConvWeights(dir_prefix + "conv3.bin", params[4], 64, 32, 3, 3)
+dumpFcBias(dir_prefix + "conv3_bias.bin", params[5], 64)
+dumpConvWeights(dir_prefix +  "conv4.bin", params[6], 64, 64, 3, 3)
+dumpFcBias(dir_prefix + "conv4_bias.bin", params[7], 64)
+dumpConvWeights(dir_prefix +  "conv5.bin", params[8], 128, 64, 3, 3)
+dumpFcBias(dir_prefix + "conv5_bias.bin", params[9], 128)
+dumpConvWeights(dir_prefix + "conv6.bin", params[10], 128, 128, 3, 3)
+dumpFcBias(dir_prefix + "conv6_bias.bin", params[11], 128)
+
+dumpFcWeights(dir_prefix +  "fc1.bin", params[12], 2048, 10)
+dumpFcBias(dir_prefix +  "fc1_bias.bin", params[13], 10)
+  
+
diff --git a/hpvm/projects/onnx/src/keras_models/lenet.py b/hpvm/projects/onnx/src/keras_models/lenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9588eef6c393457617b7fdda03c7b8222af5357
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/lenet.py
@@ -0,0 +1,97 @@
+
+import sys
+import keras
+from keras.datasets import mnist
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Flatten, Activation
+from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
+from keras import backend as K
+from frontend.approxhpvm_translator import translate_to_approxhpvm
+
+
+batch_size = 128
+num_classes = 10
+
+
+# input image dimensions
+img_rows, img_cols = 28, 28  
+
+
+if __name__ == "__main__":    
+
+    # Changing Keras data format to NCHW - NHWC is default
+    # NOTE: ApproxHPVM requires NCHW format
+    K.set_image_data_format('channels_first')
+
+    # Loads Mnist dataset
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+    test_labels = y_test
+
+    # Reshaping data to be NCHW format  
+    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
+    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
+    input_shape = (1, img_rows, img_cols)
+
+
+    # Data Normalization 
+    x_train = x_train.astype('float32')
+    x_test = x_test.astype('float32')
+    x_train /= 255
+    x_test /= 255
+
+    
+    # convert class vectors to binary class matrices - required by Keras
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+    y_test = keras.utils.to_categorical(y_test, num_classes)
+
+
+
+    # Network Compostion: 3 Conv Layers, 2 Dense Layers
+    model = Sequential()
+
+    # ConvLayer1
+    model.add(Conv2D(32, kernel_size=(5, 5),
+                     activation='relu',
+                     padding = 'same',
+                     input_shape=input_shape))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    # ConvLayer2
+    model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same'))
+
+    # ConvLayer3
+    # NOTE: ZeroPading needed for ConvLayer with strides > 1
+    model.add(ZeroPadding2D(padding = (1,1)))
+    model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') )
+    
+    model.add(Flatten())
+    # DenseLayer1
+    model.add(Dense(1024, activation='relu'))
+    # DenseLayer2
+    model.add(Dense(num_classes, activation='relu'))
+    # Softmax Layer
+    model.add(Activation('softmax'))
+
+
+    # Configures model for training    
+    model.compile(loss=keras.losses.categorical_crossentropy,
+                                optimizer=keras.optimizers.Adadelta(),
+                                metrics=['accuracy'])
+
+    # Training
+    model.fit(x_train, y_train,
+                        batch_size=batch_size,
+                        epochs=5,
+                        verbose=1,
+                        validation_data=(x_test, y_test))
+
+
+    # Inference
+    score = model.evaluate(x_test, y_test, verbose=0)
+    print('Test loss:', score[0])
+    print('Test accuracy:', score[1])
+    
+
+    # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src
+    translate_to_approxhpvm(model, "data/lenet_hpvm_batch/", x_test, test_labels, 10)
+
diff --git a/hpvm/projects/onnx/src/keras_models/mnist.ipynb b/hpvm/projects/onnx/src/keras_models/mnist.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..1964ebe3991a1eb78760ea6a1739eacfec47e666
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/mnist.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import numpy as np\n",
+    "import onnx\n",
+    "import glob\n",
+    "from onnxruntime.backend.backend import OnnxRuntimeBackend as backend\n",
+    "\n",
+    "from onnx import numpy_helper\n",
+    "\n",
+    "# onnx2hpvm modules\n",
+    "from onnx2hpvm.onnx_translator import from_onnx_to_hpvm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = onnx.load('../models/mnist/mnist.onnx')\n",
+    "test_data_dir = '../models/mnist/test_data_set_0'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load inputs\n",
+    "inputs = []\n",
+    "inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))\n",
+    "print(inputs_num)\n",
+    "for i in range(inputs_num):\n",
+    "    input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))\n",
+    "    tensor = onnx.TensorProto()\n",
+    "    with open(input_file, 'rb') as f:\n",
+    "        tensor.ParseFromString(f.read())\n",
+    "    inputs.append(numpy_helper.to_array(tensor))\n",
+    "\n",
+    "# Load reference outputs\n",
+    "ref_outputs = []\n",
+    "ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb')))\n",
+    "for i in range(ref_outputs_num):\n",
+    "    output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i))\n",
+    "    tensor = onnx.TensorProto()\n",
+    "    with open(output_file, 'rb') as f:\n",
+    "        tensor.ParseFromString(f.read())\n",
+    "    ref_outputs.append(numpy_helper.to_array(tensor))\n",
+    "\n",
+    "# Run the model on the backend\n",
+    "outputs = list(backend.run_model(model, inputs))\n",
+    "\n",
+    "#from_onnx_to_hpvm(model)\n",
+    "# Compare the results with reference outputs.\n",
+    "#for ref_o, o in zip(ref_outputs, outputs):\n",
+    "#    np.testing.assert_almost_equal(ref_o, o)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py b/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py
new file mode 100644
index 0000000000000000000000000000000000000000..b739ed819634f30f4b33173443ac41f848f9c8f1
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py
@@ -0,0 +1,161 @@
+
+import sys
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+from keras.models import Sequential
+from keras.layers import *
+from keras.datasets import cifar10
+from keras.utils import to_categorical
+from keras.callbacks import *
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Model
+from keras import optimizers
+import keras.backend as K
+from frontend.approxhpvm_translator import translate_to_approxhpvm
+
+
+K.set_image_data_format('channels_first')
+
+(X_train, y_train), (X_test, y_test) = cifar10.load_data()
+test_labels = y_test
+
+print ("X_train.shape = ", X_train.shape)
+print ("X_test.shape = ", X_test.shape)
+
+
+X_train = X_train.astype('float32')
+X_test = X_test.astype('float32')
+
+
+mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)
+std = np.std(X_train, axis=(0, 2, 3), keepdims=True)
+
+X_train = (X_train - mean) / (std + 1e-9)
+X_test = (X_test - mean) / (std + 1e-9)
+
+y_train = to_categorical(y_train, num_classes=10)
+y_test = to_categorical(y_test, num_classes=10)
+
+
+def get_mobilenet(alpha=1, depth_multiplier=1):
+    model = Sequential()
+    
+    def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):
+        channel_axis = 1
+        
+        model.add(Conv2D(filters, kernel,
+                          padding='same',
+                          use_bias=False,
+                          strides=strides, 
+                          input_shape=(3, 32, 32)))
+        model.add(BatchNormalization(axis=channel_axis))
+        model.add(Activation('relu'))
+    
+    def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):
+        channel_axis = 1 
+
+        model.add(ZeroPadding2D(padding = ((1,1), (1,1) )))
+
+        model.add(DepthwiseConv2D((3, 3),
+                                   padding='valid',
+                                   #depth_multiplier=depth_multiplier,
+                                   strides=strides,
+                                   use_bias=False))    
+        model.add(BatchNormalization(axis=channel_axis))
+        
+        model.add(Activation('relu'))
+        model.add(Conv2D(pointwise_conv_filters, (1, 1),
+                          padding='same',
+                          use_bias=False,
+                          strides=(1, 1)))
+        model.add(BatchNormalization(axis=channel_axis))
+        model.add(Activation('relu'))
+
+
+    _conv_block(32, alpha, strides=(1, 1))
+    
+    _depthwise_conv_block(64, alpha, depth_multiplier)
+    
+    _depthwise_conv_block(128, alpha, depth_multiplier,
+                              strides=(2, 2))
+    _depthwise_conv_block(128, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+
+    _depthwise_conv_block(256, alpha, depth_multiplier, 
+                      strides=(2, 2))
+    _depthwise_conv_block(256, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+
+    _depthwise_conv_block(512, alpha, depth_multiplier,
+                      strides=(2, 2))
+    _depthwise_conv_block(512, alpha, depth_multiplier)
+    _depthwise_conv_block(512, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+    
+    _depthwise_conv_block(512, alpha, depth_multiplier)
+    _depthwise_conv_block(512, alpha, depth_multiplier)
+    _depthwise_conv_block(512, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+    
+    _depthwise_conv_block(1024, alpha, depth_multiplier,
+                         strides=(2, 2))
+    _depthwise_conv_block(1024, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+
+    model.add(AveragePooling2D(pool_size=2))
+    model.add(Flatten())
+    model.add(Dense(10, activation='softmax'))
+
+    return model
+    
+    
+# data augmentation, horizontal flips only
+datagen = ImageDataGenerator(
+        featurewise_center=False,
+        featurewise_std_normalization=False,
+        rotation_range=0.0,
+        width_shift_range=0.0,
+        height_shift_range=0.0,
+        vertical_flip=False,
+        horizontal_flip=True)
+datagen.fit(X_train)
+
+
+model = get_mobilenet()
+
+learning_rates=[]
+for i in range(5):
+    learning_rates.append(2e-2)
+for i in range(50-5):
+    learning_rates.append(1e-2)
+for i in range(100-50):
+    learning_rates.append(8e-3)
+for i in range(150-100):
+    learning_rates.append(4e-3)
+for i in range(200-150):
+    learning_rates.append(2e-3)
+for i in range(300-200):
+    learning_rates.append(1e-3)
+
+callbacks = [
+    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
+]
+
+model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
+                       loss='categorical_crossentropy', 
+                       metrics=['accuracy'])
+
+model.fit_generator(
+    datagen.flow(X_train, y_train, batch_size=128),
+    steps_per_epoch=int(np.ceil(50000 / 128)),
+    validation_data=(X_test, y_test),
+    #epochs=300,
+    epochs=50,
+    callbacks=callbacks
+)
+
+model.summary()
+
+translate_to_approxhpvm(model, "data/mobilenet_hpvm/", X_test, test_labels, 10)
+
diff --git a/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py b/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py
new file mode 100644
index 0000000000000000000000000000000000000000..64df7f98174f22a59f3382ed4337d23e29900051
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py
@@ -0,0 +1,158 @@
+import sys
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from keras.models import Sequential
+from keras.layers import *
+from keras.datasets import cifar10
+from keras.utils import to_categorical
+from keras.callbacks import *
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Model
+from keras import optimizers
+import keras.backend as K
+from frontend.approxhpvm_translator import translate_to_approxhpvm
+
+
+K.set_image_data_format('channels_first')
+
+(X_train, y_train), (X_test, y_test) = cifar10.load_data()
+test_labels = y_test
+
+print ("X_train.shape = ", X_train.shape)
+print ("X_test.shape = ", X_test.shape)
+
+
+X_train = X_train.astype('float32')
+X_test = X_test.astype('float32')
+
+
+mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)
+std = np.std(X_train, axis=(0, 2, 3), keepdims=True)
+
+X_train = (X_train - mean) / (std + 1e-9)
+X_test = (X_test - mean) / (std + 1e-9)
+
+y_train = to_categorical(y_train, num_classes=10)
+y_test = to_categorical(y_test, num_classes=10)
+
+
+def get_mobilenet(alpha=1, depth_multiplier=1):
+    model = Sequential()
+    
+    def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):
+        channel_axis = 1
+        filters = int(filters * alpha)
+        model.add(Conv2D(filters, kernel,
+                          padding='same',
+                          use_bias=False,
+                          strides=strides, 
+                        input_shape=(3, 32, 32)))
+        model.add(BatchNormalization(axis=channel_axis))
+        model.add(Activation('relu'))
+    
+    def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):
+        channel_axis = 1
+        pointwise_conv_filters = int(pointwise_conv_filters * alpha)
+
+        model.add(DepthwiseConv2D((3, 3),
+                                   padding='same',
+                                   depth_multiplier=depth_multiplier,
+                                   strides=strides,
+                                   use_bias=False))    
+        model.add(BatchNormalization(axis=channel_axis))
+        model.add(Activation('relu'))
+        model.add(Conv2D(pointwise_conv_filters, (1, 1),
+                          padding='same',
+                          use_bias=False,
+                          strides=(1, 1)))
+        model.add(BatchNormalization(axis=channel_axis))
+        model.add(Activation('relu'))
+        
+
+    _conv_block(32, alpha, strides=(1, 1))
+    
+    _depthwise_conv_block(64, alpha, depth_multiplier)
+    
+    _depthwise_conv_block(128, alpha, depth_multiplier,
+                              strides=(2, 2))
+    _depthwise_conv_block(128, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+
+    _depthwise_conv_block(256, alpha, depth_multiplier, 
+                      strides=(2, 2))
+    _depthwise_conv_block(256, alpha, depth_multiplier)
+    model.add(Dropout(rate=0.5))
+
+    _depthwise_conv_block(512, alpha, depth_multiplier,
+                      strides=(2, 2))
+#     _depthwise_conv_block(512, alpha, depth_multiplier)
+#     _depthwise_conv_block(512, alpha, depth_multiplier)
+#     model.add(Dropout(rate=0.5))
+    
+#     _depthwise_conv_block(512, alpha, depth_multiplier)
+#     _depthwise_conv_block(512, alpha, depth_multiplier)
+#     _depthwise_conv_block(512, alpha, depth_multiplier)
+#     model.add(Dropout(rate=0.5))
+    
+#     _depthwise_conv_block(1024, alpha, depth_multiplier,
+#                          strides=(2, 2))
+#     _depthwise_conv_block(1024, alpha, depth_multiplier)
+#     model.add(Dropout(rate=0.5))
+
+    model.add(AveragePooling2D(pool_size=2))
+    model.add(Flatten())
+    model.add(Dense(10, activation='softmax'))
+
+    return model
+    
+    
+# data augmentation, horizontal flips only
+datagen = ImageDataGenerator(
+        featurewise_center=False,
+        featurewise_std_normalization=False,
+        rotation_range=0.0,
+        width_shift_range=0.2,
+        height_shift_range=0.2,
+        vertical_flip=False,
+        horizontal_flip=True)
+datagen.fit(X_train)
+
+
+model = get_mobilenet()
+
+learning_rates=[]
+for i in range(5):
+    learning_rates.append(5e-2)
+for i in range(50-5):
+    learning_rates.append(2e-2)
+for i in range(100-50):
+    learning_rates.append(8e-3)
+for i in range(150-100):
+    learning_rates.append(4e-3)
+for i in range(200-150):
+    learning_rates.append(2e-3)
+for i in range(250-200):
+    learning_rates.append(1e-3)
+
+callbacks = [
+    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
+]
+
+model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
+                       loss='categorical_crossentropy', 
+                       metrics=['accuracy'])
+
+model.fit_generator(
+    datagen.flow(X_train, y_train, batch_size=128),
+    steps_per_epoch=int(np.ceil(50000 / 128)),
+    validation_data=(X_test, y_test),
+    #epochs=300,
+    epochs=250,
+    callbacks=callbacks
+)
+
+model.summary()
+
+translate_to_approxhpvm(model, "data/mobilenet_shallow/", X_test, test_labels, 10)
+
diff --git a/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py b/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fbed4623d0e57d7a0dd948fa0894127fea72324
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py
@@ -0,0 +1,176 @@
+import sys
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+from keras.models import Sequential
+from keras.layers import *
+from keras.datasets import cifar10
+from keras.utils import to_categorical
+from keras.callbacks import *
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Model
+from keras import optimizers
+import keras.backend as K
+
+
+K.set_image_data_format('channels_first')
+
+(X_train, y_train), (X_test, y_test) = cifar10.load_data()
+test_labels = y_test
+
+print ("X_train.shape = ", X_train.shape)
+print ("X_test.shape = ", X_test.shape)
+
+
+X_train = X_train.astype('float32')
+X_test = X_test.astype('float32')
+
+mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True)
+std = np.std(X_train, axis=(0, 1, 2), keepdims=True)
+X_train = (X_train - mean) / (std + 1e-9)
+X_test = (X_test - mean) / (std + 1e-9)
+
+y_train = to_categorical(y_train, num_classes=10)
+y_test = to_categorical(y_test, num_classes=10)
+
+
+def _make_divisible(v, divisor, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
+    channel_axis = 1
+    
+    in_channels = inputs.shape[1]
+    pointwise_conv_filters = int(filters * alpha)
+    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
+    x = inputs
+
+    if block_id:
+        x = Conv2D(int(expansion * in_channels), kernel_size=1, strides=1, padding='valid', use_bias=False)(x)
+        x = BatchNormalization(axis=channel_axis)(x)
+        x = Activation('relu')(x)
+
+    if stride == 2:
+        x = ZeroPadding2D(padding=(1, 1))(x)
+    else:
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        
+    x = DepthwiseConv2D(kernel_size=3, strides=stride, use_bias=False, padding='valid')(x)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='valid', use_bias=False)(x)
+    x = BatchNormalization(axis=channel_axis)(x)
+
+
+    if in_channels == pointwise_filters and stride == 1:
+        return Add()([inputs, x])
+    return x
+
+def get_mobilenetv2(alpha=1.0, depth_multiplier=1):
+
+    channel_axis = 1
+    
+    first_block_filters = _make_divisible(32 * alpha, 8)
+    img_input = Input(shape=(3, 32, 32))
+
+    x = ZeroPadding2D(padding=(1, 1))(img_input)
+    x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='valid', use_bias=False)(x)
+    #x = BatchNormalization(axis=channel_axis)(x)
+    #x = Activation('relu')(x)
+
+    x = _inverted_res_block(x, filters=16,  alpha=alpha, stride=1, expansion=1, block_id=0 )
+
+    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=1 )
+    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=2 )
+
+    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=2, expansion=6, block_id=3 )
+    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=4 )
+    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=5 )
+
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=2, expansion=6, block_id=6 )
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=7 )
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=8 )
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=9 )
+    x = Dropout(rate=0.25)(x)
+
+    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=10)
+    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=11)
+    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=12)
+    x = Dropout(rate=0.25)(x)
+
+    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
+    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
+    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)
+    x = Dropout(rate=0.25)(x)
+
+    x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)
+    x = Dropout(rate=0.25)(x)
+
+    if alpha > 1.0:
+        last_block_filters = _make_divisible(1280 * alpha, 8)
+    else:
+        last_block_filters = 1280
+
+    x = Conv2D(last_block_filters, kernel_size=1, use_bias=False)(x)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+    
+    x = AveragePooling2D()(x)
+    x = Flatten()(x)
+    x = Dense(10, activation='softmax')(x)
+        
+    model = Model(inputs=img_input, outputs=x)
+    return model
+
+    
+# data augmentation, horizontal flips only
+datagen = ImageDataGenerator(
+        featurewise_center=False,
+        featurewise_std_normalization=False,
+        rotation_range=0.0,
+        width_shift_range=0.0,
+        height_shift_range=0.0,
+        vertical_flip=False,
+        horizontal_flip=True)
+datagen.fit(X_train)
+
+
+model = get_mobilenetv2()
+
+learning_rates=[]
+for i in range(5):
+    learning_rates.append(2e-2)
+for i in range(50-5):
+    learning_rates.append(1e-2)
+for i in range(100-50):
+    learning_rates.append(8e-3)
+for i in range(150-100):
+    learning_rates.append(4e-3)
+for i in range(200-150):
+    learning_rates.append(2e-3)
+for i in range(300-200):
+    learning_rates.append(1e-3)
+
+callbacks = [
+    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
+]
+
+model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
+                       loss='categorical_crossentropy', 
+                       metrics=['accuracy'])
+
+model.fit_generator(
+    datagen.flow(X_train, y_train, batch_size=128),
+    steps_per_epoch=int(np.ceil(50000 / 128)),
+    validation_data=(X_test, y_test),
+    epochs=300,
+    callbacks=callbacks
+)
+
diff --git a/hpvm/projects/onnx/src/keras_models/resnet.py b/hpvm/projects/onnx/src/keras_models/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a9e092170c767d433e438b16fdc38bd9bb9d966
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/resnet.py
@@ -0,0 +1,576 @@
+"""
+#Trains a ResNet on the CIFAR10 dataset.
+
+ResNet v1:
+[Deep Residual Learning for Image Recognition
+](https://arxiv.org/pdf/1512.03385.pdf)
+
+ResNet v2:
+[Identity Mappings in Deep Residual Networks
+](https://arxiv.org/pdf/1603.05027.pdf)
+
+
+Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti
+:------------|--:|-------:|-----------------------:|---:
+ResNet20   v1|  3| 92.16 %|                 91.25 %|35
+ResNet32   v1|  5| 92.46 %|                 92.49 %|50
+ResNet44   v1|  7| 92.50 %|                 92.83 %|70
+ResNet56   v1|  9| 92.71 %|                 93.03 %|90
+ResNet110  v1| 18| 92.65 %|            93.39+-.16 %|165
+ResNet164  v1| 27|     - %|                 94.07 %|  -
+ResNet1001 v1|N/A|     - %|                 92.39 %|  -
+
+&nbsp;
+
+Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti
+:------------|--:|-------:|-----------------------:|---:
+ResNet20   v2|  2|     - %|                     - %|---
+ResNet32   v2|N/A| NA    %|            NA         %| NA
+ResNet44   v2|N/A| NA    %|            NA         %| NA
+ResNet56   v2|  6| 93.01 %|            NA         %|100
+ResNet110  v2| 12| 93.15 %|            93.63      %|180
+ResNet164  v2| 18|     - %|            94.54      %|  -
+ResNet1001 v2|111|     - %|            95.08+-.14 %|  -
+"""
+
+from __future__ import print_function
+import keras
+from keras.layers import Dense, Conv2D, BatchNormalization, Activation
+from keras.layers import AveragePooling2D, Input, Flatten, ZeroPadding2D
+from keras.optimizers import Adam
+from keras.callbacks import ModelCheckpoint, LearningRateScheduler
+from keras.callbacks import ReduceLROnPlateau
+from keras.preprocessing.image import ImageDataGenerator
+from keras.regularizers import l2
+from keras import backend as K
+from keras.models import Model
+from keras.datasets import cifar10
+from keras import backend as K
+import numpy as np
+import os
+import sys
+#from approxhpvm_translator import translate_to_approxhpvm
+#from weight_utils import dumpCalibrationData
+
+
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+
+K.set_image_data_format('channels_first')
+
+
+# Training parameters
+batch_size = 32  # orig paper trained all networks with batch_size=128
+#---- epochs = 200
+epochs = 2
+data_augmentation = True
+num_classes = 10
+
+# Subtracting pixel mean improves accuracy
+subtract_pixel_mean = True
+
+# Model parameter
+# ----------------------------------------------------------------------------
+#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
+# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
+#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
+# ----------------------------------------------------------------------------
+# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
+# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
+# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
+# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
+# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
+# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
+# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
+# ---------------------------------------------------------------------------
+n = 3
+
+# Model version
+# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
+version = 1
+
+# Computed depth from supplied model parameter n
+if version == 1:
+    depth = n * 6 + 2
+elif version == 2:
+    depth = n * 9 + 2
+
+# Model name, depth and version
+model_type = 'ResNet%dv%d' % (depth, version)
+
+# Load the CIFAR10 data.
+(x_train, y_train), (x_test, y_test) = cifar10.load_data()
+test_labels = y_test
+train_labels = y_train
+
+# Input image dimensions.
+input_shape = x_train.shape[1:]
+
+# Normalize data.
+x_train = x_train.astype('float32') / 255
+x_test = x_test.astype('float32') / 255
+
+# If subtract pixel mean is enabled
+if subtract_pixel_mean:
+    x_train_mean = np.mean(x_train, axis=0)
+    x_train -= x_train_mean
+    x_test -= x_train_mean
+
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_test.shape[0], 'test samples')
+print('y_train shape:', y_train.shape)
+
+# Convert class vectors to binary class matrices.
+y_train = keras.utils.to_categorical(y_train, num_classes)
+y_test = keras.utils.to_categorical(y_test, num_classes)
+
+
+
+
+  
+
+def lr_schedule(epoch):
+    """Learning Rate Schedule
+
+    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
+    Called automatically every epoch as part of callbacks during training.
+
+    # Arguments
+        epoch (int): The number of epochs
+
+    # Returns
+        lr (float32): learning rate
+    """
+    lr = 1e-3
+    if epoch > 180:
+        lr *= 0.5e-3
+    elif epoch > 160:
+        lr *= 1e-3
+    elif epoch > 120:
+        lr *= 1e-2
+    elif epoch > 80:
+        lr *= 1e-1
+    print('Learning rate: ', lr)
+    return lr
+
+
+def resnet_layer(inputs,
+                 num_filters=16,
+                 kernel_size=3,
+                 strides=1,
+                 activation='relu',
+                 batch_normalization=True,
+                 conv_first=True):
+    """2D Convolution-Batch Normalization-Activation stack builder
+
+    # Arguments
+        inputs (tensor): input tensor from input image or previous layer
+        num_filters (int): Conv2D number of filters
+        kernel_size (int): Conv2D square kernel dimensions
+        strides (int): Conv2D square stride dimensions
+        activation (string): activation name
+        batch_normalization (bool): whether to include batch normalization
+        conv_first (bool): conv-bn-activation (True) or
+            bn-activation-conv (False)
+
+    # Returns
+        x (tensor): tensor as input to the next layer
+    """
+    conv = Conv2D(num_filters,
+                  kernel_size=kernel_size,
+                  strides=strides,
+                  padding='valid', # NOTE: using valid convs with explicit pad operation
+                  kernel_initializer='he_normal',
+                  kernel_regularizer=l2(1e-4))
+
+    padding_value = int((kernel_size - 1) / 2)
+    zero_padding = ZeroPadding2D(padding = (padding_value, padding_value))
+
+    # FIXME: Temporarily disabled batch normalization
+    batch_normalization = False
+    
+    x = inputs
+    x = zero_padding(x)
+    if conv_first:
+        x = conv(x)
+        if batch_normalization:
+            x = BatchNormalization()(x)
+        if activation is not None:
+            x = Activation(activation)(x)
+    else:
+        if batch_normalization:
+            x = BatchNormalization()(x)
+        if activation is not None:
+            x = Activation(activation)(x)
+        x = conv(x)
+    return x
+
+
+
+def resnet_v0(input_shape, depth, num_classes=10):
+    """ResNet Version 1 Model builder [a]
+
+    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
+    Last ReLU is after the shortcut connection.
+    At the beginning of each stage, the feature map size is halved (downsampled)
+    by a convolutional layer with strides=2, while the number of filters is
+    doubled. Within each stage, the layers have the same number filters and the
+    same number of filters.
+    Features maps sizes:
+    stage 0: 32x32, 16
+    stage 1: 16x16, 32
+    stage 2:  8x8,  64
+    The Number of parameters is approx the same as Table 6 of [a]:
+    ResNet20 0.27M
+    ResNet32 0.46M
+    ResNet44 0.66M
+    ResNet56 0.85M
+    ResNet110 1.7M
+
+    # Arguments
+        input_shape (tensor): shape of input image tensor
+        depth (int): number of core convolutional layers
+        num_classes (int): number of classes (CIFAR10 has 10)
+
+    # Returns
+        model (Model): Keras model instance
+    """
+    if (depth - 2) % 6 != 0:
+        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
+    # Start model definition.
+    num_filters = 16
+    num_res_blocks = int((depth - 2) / 6)
+
+    inputs = Input(shape=input_shape)
+    x = resnet_layer(inputs=inputs)
+    # Instantiate the stack of residual units
+    for stack in range(3):
+        for res_block in range(num_res_blocks):
+            strides = 1
+            if stack > 0 and res_block == 0:  # first layer but not first stack
+                strides = 2  # downsample
+            y = resnet_layer(inputs=x,
+                             num_filters=num_filters,
+                             strides=strides)
+            y = resnet_layer(inputs=y,
+                             num_filters=num_filters,
+                             activation=None)
+            if stack > 0 and res_block == 0:  # first layer but not first stack
+                # linear projection residual shortcut connection to match
+                # changed dims
+                x = resnet_layer(inputs=x,
+                                 num_filters=num_filters,
+                                 kernel_size=1,
+                                 strides=strides,
+                                 activation=None,
+                                 batch_normalization=False)
+            x = keras.layers.add([x, y])
+            x = Activation('relu')(x)
+        num_filters *= 1
+
+    # Add classifier on top.
+    # v1 does not use BN after last shortcut connection-ReLU
+    #-- x = AveragePooling2D(pool_size=8)(x)
+    y = Flatten()(x)
+    x = Dense(64)(y)
+    outputs = Dense(num_classes,
+                    activation='softmax',
+                    kernel_initializer='he_normal')(x)
+
+    # Instantiate model.
+    model = Model(inputs=inputs, outputs=outputs)
+    return model
+
+
+def resnet_v1_1(input_shape, depth, num_classes=10):
+    """ResNet Version 1 Model builder [a]
+
+    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
+    Last ReLU is after the shortcut connection.
+    At the beginning of each stage, the feature map size is halved (downsampled)
+    by a convolutional layer with strides=2, while the number of filters is
+    doubled. Within each stage, the layers have the same number filters and the
+    same number of filters.
+    Features maps sizes:
+    stage 0: 32x32, 16
+    stage 1: 16x16, 32
+    stage 2:  8x8,  64
+    The Number of parameters is approx the same as Table 6 of [a]:
+    ResNet20 0.27M
+    ResNet32 0.46M
+    ResNet44 0.66M
+    ResNet56 0.85M
+    ResNet110 1.7M
+
+    # Arguments
+        input_shape (tensor): shape of input image tensor
+        depth (int): number of core convolutional layers
+        num_classes (int): number of classes (CIFAR10 has 10)
+
+    # Returns
+        model (Model): Keras model instance
+    """
+    if (depth - 2) % 6 != 0:
+        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
+    # Start model definition.
+    num_filters = 16
+    num_res_blocks = int((depth - 2) / 6)
+
+    inputs = Input(shape=input_shape)
+    x = resnet_layer(inputs=inputs)
+    # Instantiate the stack of residual units
+    for stack in range(3):
+        for res_block in range(num_res_blocks):
+            strides = 1
+            if stack > 0 and res_block == 0:  # first layer but not first stack
+                strides = 2  # downsample
+            y = resnet_layer(inputs=x,
+                             num_filters=num_filters,
+                             strides=strides)
+            y = resnet_layer(inputs=y,
+                             num_filters=num_filters,
+                             activation=None)
+            if stack > 0 and res_block == 0:  # first layer but not first stack
+                # linear projection residual shortcut connection to match
+                # changed dims
+                x = resnet_layer(inputs=x,
+                                 num_filters=num_filters,
+                                 kernel_size=1,
+                                 strides=strides,
+                                 activation=None,
+                                 batch_normalization=False)
+            x = keras.layers.add([x, y])
+            x = Activation('relu')(x)
+        num_filters *= 2
+
+        
+    x = AveragePooling2D(pool_size=8)(x)
+    y = Flatten()(x)
+    outputs = Dense(num_classes,
+                    #activation='softmax',
+                    kernel_initializer='he_normal')(y)
+
+    outputs = Activation('softmax')(outputs)
+
+
+    # Instantiate model.
+    model = Model(inputs=inputs, outputs=outputs)
+    return model
+
+
+
+def resnet_v2(input_shape, depth, num_classes=10):
+    """ResNet Version 2 Model builder [b]
+
+    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
+    bottleneck layer
+    First shortcut connection per layer is 1 x 1 Conv2D.
+    Second and onwards shortcut connection is identity.
+    At the beginning of each stage, the feature map size is halved (downsampled)
+    by a convolutional layer with strides=2, while the number of filter maps is
+    doubled. Within each stage, the layers have the same number filters and the
+    same filter map sizes.
+    Features maps sizes:
+    conv1  : 32x32,  16
+    stage 0: 32x32,  64
+    stage 1: 16x16, 128
+    stage 2:  8x8,  256
+
+    # Arguments
+        input_shape (tensor): shape of input image tensor
+        depth (int): number of core convolutional layers
+        num_classes (int): number of classes (CIFAR10 has 10)
+
+    # Returns
+        model (Model): Keras model instance
+    """
+    if (depth - 2) % 9 != 0:
+        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
+    # Start model definition.
+    num_filters_in = 16
+    num_res_blocks = int((depth - 2) / 9)
+
+    inputs = Input(shape=input_shape)
+    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
+    x = resnet_layer(inputs=inputs,
+                     num_filters=num_filters_in,
+                     conv_first=True)
+
+    # Instantiate the stack of residual units
+    for stage in range(3):
+        for res_block in range(num_res_blocks):
+            activation = 'relu'
+            batch_normalization = True
+            strides = 1
+            if stage == 0:
+                num_filters_out = num_filters_in * 4
+                if res_block == 0:  # first layer and first stage
+                    activation = None
+                    batch_normalization = False
+            else:
+                num_filters_out = num_filters_in * 2
+                if res_block == 0:  # first layer but not first stage
+                    strides = 2    # downsample
+
+            # bottleneck residual unit
+            y = resnet_layer(inputs=x,
+                             num_filters=num_filters_in,
+                             kernel_size=1,
+                             strides=strides,
+                             activation=activation,
+                             batch_normalization=batch_normalization,
+                             conv_first=False)
+            y = resnet_layer(inputs=y,
+                             num_filters=num_filters_in,
+                             conv_first=False)
+            y = resnet_layer(inputs=y,
+                             num_filters=num_filters_out,
+                             kernel_size=1,
+                             conv_first=False)
+            if res_block == 0:
+                # linear projection residual shortcut connection to match
+                # changed dims
+                x = resnet_layer(inputs=x,
+                                 num_filters=num_filters_out,
+                                 kernel_size=1,
+                                 strides=strides,
+                                 activation=None,
+                                 batch_normalization=False)
+            x = keras.layers.add([x, y])
+
+        num_filters_in = num_filters_out
+
+    # Add classifier on top.
+    # v2 has BN-ReLU before Pooling
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = AveragePooling2D(pool_size=8)(x)
+    y = Flatten()(x)
+    outputs = Dense(num_classes,
+                    activation='softmax',
+                    kernel_initializer='he_normal')(y)
+
+    # Instantiate model.
+    model = Model(inputs=inputs, outputs=outputs)
+    return model
+
+depth = 20
+
+if version == 2:
+    model = resnet_v2(input_shape=input_shape, depth=depth)
+else:
+    model = resnet_v1_1(input_shape=input_shape, depth=depth)
+
+    
+model.compile(loss='categorical_crossentropy',
+              optimizer=Adam(lr=lr_schedule(0)),
+              metrics=['accuracy'])
+model.summary()
+print(model_type)
+
+# Prepare model model saving directory.
+save_dir = os.path.join(os.getcwd(), 'saved_models')
+model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
+if not os.path.isdir(save_dir):
+    os.makedirs(save_dir)
+filepath = os.path.join(save_dir, model_name)
+
+# Prepare callbacks for model saving and for learning rate adjustment.
+checkpoint = ModelCheckpoint(filepath=filepath,
+                             monitor='val_acc',
+                             verbose=1,
+                             save_best_only=True)
+
+lr_scheduler = LearningRateScheduler(lr_schedule)
+
+lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
+                               cooldown=0,
+                               patience=5,
+                               min_lr=0.5e-6)
+
+callbacks = [checkpoint, lr_reducer, lr_scheduler]
+
+# Run training, with or without data augmentation.
+if not data_augmentation:
+    print('Not using data augmentation.')
+    model.fit(x_train, y_train,
+              batch_size=batch_size,
+              epochs=epochs,
+              validation_data=(x_test, y_test),
+              shuffle=True,
+              callbacks=callbacks)
+else:
+    print('Using real-time data augmentation.')
+    # This will do preprocessing and realtime data augmentation:
+    datagen = ImageDataGenerator(
+        # set input mean to 0 over the dataset
+        featurewise_center=False,
+        # set each sample mean to 0
+        samplewise_center=False,
+        # divide inputs by std of dataset
+        featurewise_std_normalization=False,
+        # divide each input by its std
+        samplewise_std_normalization=False,
+        # apply ZCA whitening
+        zca_whitening=False,
+        # epsilon for ZCA whitening
+        zca_epsilon=1e-06,
+        # randomly rotate images in the range (deg 0 to 180)
+        rotation_range=0,
+        # randomly shift images horizontally
+        width_shift_range=0.1,
+        # randomly shift images vertically
+        height_shift_range=0.1,
+        # set range for random shear
+        shear_range=0.,
+        # set range for random zoom
+        zoom_range=0.,
+        # set range for random channel shifts
+        channel_shift_range=0.,
+        # set mode for filling points outside the input boundaries
+        fill_mode='nearest',
+        # value used for fill_mode = "constant"
+        cval=0.,
+        # randomly flip images
+        horizontal_flip=True,
+        # randomly flip images
+        vertical_flip=False,
+        # set rescaling factor (applied before any other transformation)
+        rescale=None,
+        # set function that will be applied on each input
+        preprocessing_function=None,
+        # image data format, either "channels_first" or "channels_last"
+        data_format="channels_first",
+        # fraction of images reserved for validation (strictly between 0 and 1)
+        validation_split=0.0)
+
+    # Compute quantities required for featurewise normalization
+    # (std, mean, and principal components if ZCA whitening is applied).
+    datagen.fit(x_train)
+
+    # Fit the model on the batches generated by datagen.flow().
+    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
+                        validation_data=(x_test, y_test),
+                        epochs=epochs, verbose=1, workers=4,
+                        callbacks=callbacks)
+
+# Score trained model.
+scores = model.evaluate(x_test, y_test, verbose=1)
+print('Test loss:', scores[0])
+print('Test accuracy:', scores[1])
+
+
+#dumpCalibrationData("calibration_data/resnet18_calib.bin", x_train,
+#                        "calibration_data/resnet18_train_labels.bin", train_labels)
+#sys.exit(0)
+    
+#translate_to_approxhpvm(model, "resnet18_cifar10_hpvm/", x_test, test_labels)
+
+#translate_to_approxhpvm(model, "resnet_test/", x_test, test_labels, 'resnet18_cifar10_promise/', y_test)
+
+import keras2onnx
+onnx_model = keras2onnx.convert_keras(model, model.name)
+import onnx
+onnx.save(onnx_model, "../models/keras/resnet.onnx")
diff --git a/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py b/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py
new file mode 100644
index 0000000000000000000000000000000000000000..64e5d36e78fd728381dc864bf965ff68c4e7cf16
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py
@@ -0,0 +1,241 @@
+
+
+from __future__ import print_function
+import keras
+from keras.datasets import cifar10
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation, Flatten
+from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
+from keras import optimizers
+import numpy as np
+from keras.layers.core import Lambda
+from keras import backend as K
+from keras import regularizers
+import os
+import sys
+from frontend.approxhpvm_translator import translate_to_approxhpvm
+from frontend.weight_utils import dumpCalibrationData
+
+
+
+class cifar10vgg:
+    def __init__(self,train=True):
+        self.num_classes = 10
+        self.weight_decay = 0.0005
+        self.x_shape = [3,32,32]
+
+        self.model = self.build_model()
+        if train:
+            self.model = self.train(self.model)
+        else:
+            self.model.load_weights('cifar10vgg.h5')
+
+
+    def build_model(self):
+        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.
+
+        model = Sequential()
+        weight_decay = self.weight_decay
+
+        model.add(Conv2D(64, (3, 3), padding='same',
+                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.3))
+
+        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.5))
+
+        model.add(Flatten())
+        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        # model.add(BatchNormalization())
+
+        model.add(Dropout(0.5))
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+        return model
+
+
+    def normalize(self,X_train,X_test):
+        #this function normalize inputs for zero mean and unit variance
+        # it is used when training a model.
+        # Input: training set and test set
+        # Output: normalized training set and test set according to the trianing set statistics.
+        mean = np.mean(X_train,axis=(0,1,2,3))
+        std = np.std(X_train, axis=(0, 1, 2, 3))
+        X_train = (X_train-mean)/(std+1e-7)
+        X_test = (X_test-mean)/(std+1e-7)
+        return X_train, X_test
+
+    
+    def normalize_production(self,x):
+        #this function is used to normalize instances in production according to saved training set statistics
+        # Input: X - a training set
+        # Output X - a normalized training set according to normalization constants.
+
+        #these values produced during first training and are general for the standard cifar10 training set normalization
+        mean = 120.707
+        std = 64.15
+        return (x-mean)/(std+1e-7)
+
+    
+    def predict(self,x,normalize=True,batch_size=50):
+        if normalize:
+            x = self.normalize_production(x)
+        return self.model.predict(x,batch_size)
+
+    
+    def train(self,model):
+
+        #training parameters
+        batch_size = 128
+        #maxepoches = 250
+        #maxepoches = 250
+        maxepoches = 30
+        learning_rate = 0.01
+        lr_decay = 1e-6
+        lr_drop = 20
+        # The data, shuffled and split between train and test sets:
+        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
+        x_train = x_train.astype('float32')
+        x_test = x_test.astype('float32')
+        x_train, x_test = self.normalize(x_train, x_test)
+
+        y_train = keras.utils.to_categorical(y_train, self.num_classes)
+        y_test = keras.utils.to_categorical(y_test, self.num_classes)
+
+        def lr_scheduler(epoch):
+            return learning_rate * (0.5 ** (epoch // lr_drop))
+        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)
+
+        #data augmentation
+        datagen = ImageDataGenerator(
+            featurewise_center=False,  # set input mean to 0 over the dataset
+            samplewise_center=False,  # set each sample mean to 0
+            featurewise_std_normalization=False,  # divide inputs by std of the dataset
+            samplewise_std_normalization=False,  # divide each input by its std
+            zca_whitening=False,  # apply ZCA whitening
+            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
+            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
+            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
+            horizontal_flip=True,  # randomly flip images
+            vertical_flip=False)  # randomly flip images
+        # (std, mean, and principal components if ZCA whitening is applied).
+        datagen.fit(x_train)
+
+
+
+        #optimization details
+        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
+        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])
+
+
+        # training process in a for loop with learning rate drop every 25 epoches.
+
+        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
+                                         batch_size=batch_size),
+                            steps_per_epoch=x_train.shape[0] // batch_size,
+                            epochs=maxepoches,
+                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
+        
+        model.save_weights('cifar10vgg.h5')
+        return model
+
+
+    
+if __name__ == '__main__':
+
+    K.set_image_data_format('channels_first')
+
+    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+    
+    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
+    test_labels = y_test
+    train_labels = y_train
+    x_train = x_train.astype('float32')
+    x_test = x_test.astype('float32')
+
+    y_train = keras.utils.to_categorical(y_train, 10)
+    y_test = keras.utils.to_categorical(y_test, 10)
+
+    model = cifar10vgg()
+
+    predicted_x = model.predict(x_test)
+
+    norm_test = model.normalize_production(x_test)
+
+    # Normalizing train data before dumping
+    #x_train, x_test = model.normalize(x_train, x_test)
+    x_train = model.normalize_production(x_train)
+    
+    # dumpCalibrationData("vgg16_cifar_calib.bin", x_train, "vgg16_train_labels.bin", train_labels)
+    
+    translate_to_approxhpvm(model.model, "data/vgg16_cifar10/", norm_test, test_labels, 10)
+
+    residuals = np.argmax(predicted_x,1)!=np.argmax(y_test,1)
+
+    loss = sum(residuals)/len(residuals)
+    print("the validation 0/1 loss is: ",loss)
+
+
+
diff --git a/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py b/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py
new file mode 100644
index 0000000000000000000000000000000000000000..66fe6be669f984b92c8c602332c29e09968e9c8a
--- /dev/null
+++ b/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py
@@ -0,0 +1,243 @@
+
+from __future__ import print_function
+import os
+import keras
+from keras.datasets import cifar100
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation, Flatten
+from keras.layers import Conv2D, MaxPooling2D
+from keras import optimizers
+import numpy as np
+from keras.layers.core import Lambda
+from keras import backend as K
+from keras import regularizers
+from approxhpvm_translator import translate_to_approxhpvm
+import sys
+from weight_utils import dumpCalibrationData
+
+
+
+class cifar100vgg:
+    def __init__(self,train=True):
+        self.num_classes = 100
+        self.weight_decay = 0.0005
+        self.x_shape = [3,32,32]
+
+        self.model = self.build_model()
+        if train:
+            self.model = self.train(self.model)
+        else:
+            self.model.load_weights('cifar100vgg.h5')
+
+
+    def build_model(self):
+        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.
+
+        model = Sequential()
+        weight_decay = self.weight_decay
+
+        model.add(Conv2D(64, (3, 3), padding='same',
+                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.3))
+
+        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.5))
+
+        model.add(Flatten())
+        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+
+        model.add(Dropout(0.5))
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+        return model
+
+
+    def normalize(self,X_train,X_test):
+        #this function normalize inputs for zero mean and unit variance
+        # it is used when training a model.
+        # Input: training set and test set
+        # Output: normalized training set and test set according to the trianing set statistics.
+        mean = np.mean(X_train,axis=(0,1,2,3))
+        std = np.std(X_train, axis=(0, 1, 2, 3))
+        print(mean)
+        print(std)
+        X_train = (X_train-mean)/(std+1e-7)
+        X_test = (X_test-mean)/(std+1e-7)
+        return X_train, X_test
+
+    def normalize_production(self,x):
+        #this function is used to normalize instances in production according to saved training set statistics
+        # Input: X - a training set
+        # Output X - a normalized training set according to normalization constants.
+
+        #these values produced during first training and are general for the standard cifar10 training set normalization
+        mean = 121.936
+        std = 68.389
+        return (x-mean)/(std+1e-7)
+
+    def predict(self,x,normalize=True,batch_size=50):
+        if normalize:
+            x = self.normalize_production(x)
+        return self.model.predict(x,batch_size)
+
+    def train(self,model):
+
+        #training parameters
+        batch_size = 128
+        #maxepoches = 250
+        #maxepoches = 400
+        maxepoches = 4
+        learning_rate = 0.05
+        lr_decay = 1e-6
+        lr_drop = 20
+
+        # The data, shuffled and split between train and test sets:
+        (x_train, y_train), (x_test, y_test) = cifar100.load_data()
+        x_train = x_train.astype('float32')
+        x_test = x_test.astype('float32')
+        x_train, x_test = self.normalize(x_train, x_test)
+
+        y_train = keras.utils.to_categorical(y_train, self.num_classes)
+        y_test = keras.utils.to_categorical(y_test, self.num_classes)
+
+
+        def lr_scheduler(epoch):
+            return learning_rate * (0.5 ** (epoch // lr_drop))
+        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)
+
+
+        #data augmentation
+        datagen = ImageDataGenerator(
+            featurewise_center=False,  # set input mean to 0 over the dataset
+            samplewise_center=False,  # set each sample mean to 0
+            featurewise_std_normalization=False,  # divide inputs by std of the dataset
+            samplewise_std_normalization=False,  # divide each input by its std
+            zca_whitening=False,  # apply ZCA whitening
+            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
+            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
+            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
+            horizontal_flip=True,  # randomly flip images
+            vertical_flip=False)  # randomly flip images
+        # (std, mean, and principal components if ZCA whitening is applied).
+        datagen.fit(x_train)
+
+
+
+        #optimization details
+        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
+        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])
+
+
+        # training process in a for loop with learning rate drop every 25 epoches.
+
+        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
+                                         batch_size=batch_size),
+                            steps_per_epoch=x_train.shape[0] // batch_size,
+                            epochs=maxepoches,
+                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
+        model.save_weights('cifar100vgg.h5')
+        return model
+
+if __name__ == '__main__':
+
+    K.set_image_data_format('channels_first')
+    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+    (x_train, y_train), (x_test, y_test) = cifar100.load_data()
+    test_labels = y_test
+    train_labels = y_train
+
+    x_train = x_train.astype('float32')
+    x_test = x_test.astype('float32')
+
+    y_train = keras.utils.to_categorical(y_train, 100)
+    y_test = keras.utils.to_categorical(y_test, 100)
+
+    model = cifar100vgg()
+
+    predicted_x = model.predict(x_test)
+
+    norm_test = model.normalize_production(x_test)
+
+    x_train = model.normalize_production(x_train)
+    
+    dumpCalibrationData("calibration_data/vgg16_cifar100_calib.bin", x_train,
+                        "calibration_data/vgg16_cifar100_train_labels.bin", train_labels)
+    sys.exit(0)
+ 
+    
+    translate_to_approxhpvm(model.model, "vgg16_cifar100_test/", norm_test, test_labels, 
+                            "vgg16_cifar100_front", y_test)
+
+    
+    residuals = (np.argmax(predicted_x,1)!=np.argmax(y_test,1))
+    loss = sum(residuals)/len(residuals)
+    print("the validation 0/1 loss is: ",loss)
+
+
diff --git a/hpvm/projects/onnx/src/lenet_keras.ipynb b/hpvm/projects/onnx/src/lenet_keras.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..bd83db10050f74988dc313991d06b99e247cf011
--- /dev/null
+++ b/hpvm/projects/onnx/src/lenet_keras.ipynb
@@ -0,0 +1,199 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "import keras\n",
+    "from keras.datasets import mnist\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Dropout, Flatten, Activation\n",
+    "from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D\n",
+    "from keras import backend as K"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 128\n",
+    "num_classes = 10\n",
+    "# input image dimensions\n",
+    "img_rows, img_cols = 28, 28  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Changing Keras data format to NCHW - NHWC is default\n",
+    "# NOTE: ApproxHPVM requires NCHW format\n",
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "# Loads Mnist dataset\n",
+    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
+    "test_labels = y_test\n",
+    "\n",
+    "# Reshaping data to be NCHW format  \n",
+    "x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n",
+    "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n",
+    "input_shape = (1, img_rows, img_cols)\n",
+    "\n",
+    "\n",
+    "# Data Normalization \n",
+    "x_train = x_train.astype('float32')\n",
+    "x_test = x_test.astype('float32')\n",
+    "x_train /= 255\n",
+    "x_test /= 255\n",
+    "\n",
+    "\n",
+    "# convert class vectors to binary class matrices - required by Keras\n",
+    "y_train = keras.utils.to_categorical(y_train, num_classes)\n",
+    "y_test = keras.utils.to_categorical(y_test, num_classes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 60000 samples, validate on 10000 samples\n",
+      "Epoch 1/5\n",
+      "60000/60000 [==============================] - 8s 131us/step - loss: 0.2878 - acc: 0.9072 - val_loss: 0.0514 - val_acc: 0.9814\n",
+      "Epoch 2/5\n",
+      "60000/60000 [==============================] - 5s 83us/step - loss: 0.0459 - acc: 0.9854 - val_loss: 0.0337 - val_acc: 0.9888\n",
+      "Epoch 3/5\n",
+      "60000/60000 [==============================] - 8s 127us/step - loss: 0.0290 - acc: 0.9908 - val_loss: 0.0275 - val_acc: 0.9902\n",
+      "Epoch 4/5\n",
+      "60000/60000 [==============================] - 9s 151us/step - loss: 0.0195 - acc: 0.9940 - val_loss: 0.0367 - val_acc: 0.9883\n",
+      "Epoch 5/5\n",
+      "60000/60000 [==============================] - 10s 169us/step - loss: 0.0136 - acc: 0.9960 - val_loss: 0.0318 - val_acc: 0.9905\n",
+      "Test loss: 0.031750623502753844\n",
+      "Test accuracy: 0.9905\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Network Compostion: 3 Conv Layers, 2 Dense Layers\n",
+    "model = Sequential()\n",
+    "\n",
+    "# ConvLayer1\n",
+    "model.add(Conv2D(32, kernel_size=(5, 5),\n",
+    "                 activation='relu',\n",
+    "                 padding = 'same',\n",
+    "                 input_shape=input_shape))\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "\n",
+    "# ConvLayer2\n",
+    "model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same'))\n",
+    "\n",
+    "# ConvLayer3\n",
+    "# NOTE: ZeroPading needed for ConvLayer with strides > 1\n",
+    "model.add(ZeroPadding2D(padding = (1,1)))\n",
+    "model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') )\n",
+    "\n",
+    "model.add(Flatten())\n",
+    "# DenseLayer1\n",
+    "model.add(Dense(1024, activation='relu'))\n",
+    "# DenseLayer2\n",
+    "model.add(Dense(num_classes, activation='relu'))\n",
+    "# Softmax Layer\n",
+    "model.add(Activation('softmax'))\n",
+    "\n",
+    "\n",
+    "# Configures model for training    \n",
+    "model.compile(loss=keras.losses.categorical_crossentropy,\n",
+    "                            optimizer=keras.optimizers.Adadelta(),\n",
+    "                            metrics=['accuracy'])\n",
+    "\n",
+    "# Training\n",
+    "model.fit(x_train, y_train,\n",
+    "                    batch_size=batch_size,\n",
+    "                    epochs=5,\n",
+    "                    verbose=1,\n",
+    "                    validation_data=(x_test, y_test))\n",
+    "\n",
+    "\n",
+    "# Inference\n",
+    "score = model.evaluate(x_test, y_test, verbose=0)\n",
+    "print('Test loss:', score[0])\n",
+    "print('Test accuracy:', score[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time: 1.037928581237793\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "start = time.time()\n",
+    "keras_result = model.predict(x_test[:8000])\n",
+    "print(\"time:\", time.time() - start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras2onnx\n",
+    "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n",
+    "import onnx\n",
+    "onnx.save(onnx_model, \"../models/keras/lenet.onnx\")\n",
+    "import pickle\n",
+    "with open('dumps/lenet_keras_dump', 'wb') as fp:\n",
+    "    pickle.dump(keras_result, fp)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/lenet_onnx.ipynb b/hpvm/projects/onnx/src/lenet_onnx.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..fdf69ebc3cde8c1e12bee99bcde574786ffd0c18
--- /dev/null
+++ b/hpvm/projects/onnx/src/lenet_onnx.ipynb
@@ -0,0 +1,191 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import keras\n",
+    "from keras.datasets import mnist\n",
+    "from keras import backend as K\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import keras2onnx\n",
+    "import onnx\n",
+    "import onnxruntime\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 128\n",
+    "num_classes = 10\n",
+    "# input image dimensions\n",
+    "img_rows, img_cols = 28, 28  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Changing Keras data format to NCHW - NHWC is default\n",
+    "# NOTE: ApproxHPVM requires NCHW format\n",
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "# Loads Mnist dataset\n",
+    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
+    "test_labels = y_test\n",
+    "\n",
+    "# Reshaping data to be NCHW format  \n",
+    "x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n",
+    "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n",
+    "input_shape = (1, img_rows, img_cols)\n",
+    "\n",
+    "\n",
+    "# Data Normalization \n",
+    "x_train = x_train.astype('float32')\n",
+    "x_test = x_test.astype('float32')\n",
+    "x_train /= 255\n",
+    "x_test /= 255\n",
+    "\n",
+    "\n",
+    "# convert class vectors to binary class matrices - required by Keras\n",
+    "y_train = keras.utils.to_categorical(y_train, num_classes)\n",
+    "y_test = keras.utils.to_categorical(y_test, num_classes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sess = onnxruntime.InferenceSession(\"../models/keras/lenet.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input name  : conv2d_1_input\n",
+      "Input shape : [None, 1, 28, 28]\n",
+      "Input type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = sess.get_inputs()[0].name\n",
+    "print(\"Input name  :\", input_name)\n",
+    "input_shape = sess.get_inputs()[0].shape\n",
+    "print(\"Input shape :\", input_shape)\n",
+    "input_type = sess.get_inputs()[0].type\n",
+    "print(\"Input type  :\", input_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output name  : activation_1\n",
+      "Output shape : [None, 10]\n",
+      "Output type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "output_name = sess.get_outputs()[0].name\n",
+    "print(\"Output name  :\", output_name)  \n",
+    "output_shape = sess.get_outputs()[0].shape\n",
+    "print(\"Output shape :\", output_shape)\n",
+    "output_type = sess.get_outputs()[0].type\n",
+    "print(\"Output type  :\", output_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "start = time.time()\n",
+    "ort_result = sess.run([output_name], {input_name: x_test[:8000]})\n",
+    "print(\"time:\", time.time() - start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "with open('dumps/lenet_ort_dump', 'wb') as fp:\n",
+    "    pickle.dump(ort_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/lenet_ort_dump', 'rb') as fp:\n",
+    "    ort_res = pickle.load(fp)\n",
+    "with open ('dumps/lenet_keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(ort_res[0], keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/lenet_tvm.ipynb b/hpvm/projects/onnx/src/lenet_tvm.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..274553dcc52d2b84f601a4480b04adc54a1ac0ea
--- /dev/null
+++ b/hpvm/projects/onnx/src/lenet_tvm.ipynb
@@ -0,0 +1,241 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import mnist\n",
+    "from keras import backend as K\n",
+    "import keras\n",
+    "import sys\n",
+    "import struct\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import tvm\n",
+    "import tvm.relay as relay\n",
+    "from tvm.contrib import graph_runtime\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 128\n",
+    "num_classes = 10\n",
+    "# input image dimensions\n",
+    "img_rows, img_cols = 28, 28 "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Changing Keras data format to NCHW - NHWC is default\n",
+    "# NOTE: ApproxHPVM requires NCHW format\n",
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "# Loads Mnist dataset\n",
+    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
+    "test_labels = y_test\n",
+    "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n",
+    "input_shape = (1, img_rows, img_cols)\n",
+    "x_test = x_test.astype('float32')\n",
+    "x_test /= 255"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnx\n",
+    "onnx_model = onnx.load(\"../models/keras/lenet.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n"
+     ]
+    }
+   ],
+   "source": [
+    "target='cuda -libs=cudnn,cublas'\n",
+    "input_name = 'conv2d_1_input'\n",
+    "shape_dict = {input_name: x_test.shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "ctx = tvm.gpu()\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    executor = relay.build_module.create_executor('graph', mod, ctx, target)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (10000, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (10000, 3136, 'float32'), (1024, 3136, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 64, 14, 14, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 32, 14, 14, 'float32'), (64, 32, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 1, 28, 28, 'float32'), (32, 1, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n"
+     ]
+    }
+   ],
+   "source": [
+    "tvm_out = executor.evaluate()(tvm.nd.array(x_test), **params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 3136, 'float32'), (1024, 3136, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 64, 14, 14, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 32, 14, 14, 'float32'), (64, 32, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 1, 28, 28, 'float32'), (32, 1, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = 'conv2d_1_input'\n",
+    "input_size = 8000\n",
+    "shape_dict = {input_name: x_test[:input_size].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "target = 'cuda -libs=cudnn,cublas'\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    graph, lib, params = relay.build(mod, target, params=params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Time: 0.043964385986328125\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "ctx = tvm.gpu()\n",
+    "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n",
+    "# create module\n",
+    "module = graph_runtime.create(graph, lib, ctx)\n",
+    "# set input and parameters\n",
+    "module.set_input(\"conv2d_1_input\", x_test[:input_size])\n",
+    "module.set_input(**params)\n",
+    "# run\n",
+    "start_time = time.time()\n",
+    "module.run()\n",
+    "out_shape = (input_size, 10)\n",
+    "# get output\n",
+    "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n",
+    "print(\"Time:\", time.time() - start_time)\n",
+    "#ftimer = module.module.time_evaluator(\"get_output\", ctx, number=1)\n",
+    "#prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond\n",
+    "#print(\"%-20s %-19s (%s)\" % (\"lenet\", \"%.2f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "with open('dumps/lenet_tvm_dump', 'wb') as fp:\n",
+    "    pickle.dump(out, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/lenet_tvm_dump', 'rb') as fp:\n",
+    "    tvm_res = pickle.load(fp)\n",
+    "with open ('dumps/lenet_keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(tvm_res, keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/mobilenet_keras.ipynb b/hpvm/projects/onnx/src/mobilenet_keras.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..5175bc2aef1403fec6a70ffb0cdd504a062d5f96
--- /dev/null
+++ b/hpvm/projects/onnx/src/mobilenet_keras.ipynb
@@ -0,0 +1,592 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import *\n",
+    "from keras.datasets import cifar10\n",
+    "from keras.utils import to_categorical\n",
+    "from keras.callbacks import *\n",
+    "from keras.preprocessing.image import ImageDataGenerator\n",
+    "from keras.models import Model\n",
+    "from keras import optimizers\n",
+    "import keras.backend as K\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "X_train.shape =  (50000, 3, 32, 32)\n",
+      "X_test.shape =  (10000, 3, 32, 32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n",
+    "test_labels = y_test\n",
+    "\n",
+    "print (\"X_train.shape = \", X_train.shape)\n",
+    "print (\"X_test.shape = \", X_test.shape)\n",
+    "\n",
+    "\n",
+    "X_train = X_train.astype('float32')\n",
+    "X_test = X_test.astype('float32')\n",
+    "\n",
+    "\n",
+    "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n",
+    "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n",
+    "\n",
+    "X_train = (X_train - mean) / (std + 1e-9)\n",
+    "X_test = (X_test - mean) / (std + 1e-9)\n",
+    "\n",
+    "y_train = to_categorical(y_train, num_classes=10)\n",
+    "y_test = to_categorical(y_test, num_classes=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_mobilenet(alpha=1, depth_multiplier=1):\n",
+    "    model = Sequential()\n",
+    "    \n",
+    "    def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):\n",
+    "        channel_axis = 1\n",
+    "        \n",
+    "        model.add(Conv2D(filters, kernel,\n",
+    "                          padding='same',\n",
+    "                          use_bias=False,\n",
+    "                          strides=strides, \n",
+    "                          input_shape=(3, 32, 32)))\n",
+    "        model.add(BatchNormalization(axis=channel_axis))\n",
+    "        model.add(Activation('relu'))\n",
+    "    \n",
+    "    def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):\n",
+    "        channel_axis = 1 \n",
+    "\n",
+    "        model.add(ZeroPadding2D(padding = ((1,1), (1,1) )))\n",
+    "\n",
+    "        model.add(DepthwiseConv2D((3, 3),\n",
+    "                                   padding='valid',\n",
+    "                                   #depth_multiplier=depth_multiplier,\n",
+    "                                   strides=strides,\n",
+    "                                   use_bias=False))    \n",
+    "        model.add(BatchNormalization(axis=channel_axis))\n",
+    "        \n",
+    "        model.add(Activation('relu'))\n",
+    "        model.add(Conv2D(pointwise_conv_filters, (1, 1),\n",
+    "                          padding='same',\n",
+    "                          use_bias=False,\n",
+    "                          strides=(1, 1)))\n",
+    "        model.add(BatchNormalization(axis=channel_axis))\n",
+    "        model.add(Activation('relu'))\n",
+    "\n",
+    "\n",
+    "    _conv_block(32, alpha, strides=(1, 1))\n",
+    "    \n",
+    "    _depthwise_conv_block(64, alpha, depth_multiplier)\n",
+    "    \n",
+    "    _depthwise_conv_block(128, alpha, depth_multiplier,\n",
+    "                              strides=(2, 2))\n",
+    "    _depthwise_conv_block(128, alpha, depth_multiplier)\n",
+    "    model.add(Dropout(rate=0.5))\n",
+    "\n",
+    "    _depthwise_conv_block(256, alpha, depth_multiplier, \n",
+    "                      strides=(2, 2))\n",
+    "    _depthwise_conv_block(256, alpha, depth_multiplier)\n",
+    "    model.add(Dropout(rate=0.5))\n",
+    "\n",
+    "    _depthwise_conv_block(512, alpha, depth_multiplier,\n",
+    "                      strides=(2, 2))\n",
+    "    _depthwise_conv_block(512, alpha, depth_multiplier)\n",
+    "    _depthwise_conv_block(512, alpha, depth_multiplier)\n",
+    "    model.add(Dropout(rate=0.5))\n",
+    "    \n",
+    "    _depthwise_conv_block(512, alpha, depth_multiplier)\n",
+    "    _depthwise_conv_block(512, alpha, depth_multiplier)\n",
+    "    _depthwise_conv_block(512, alpha, depth_multiplier)\n",
+    "    model.add(Dropout(rate=0.5))\n",
+    "    \n",
+    "    _depthwise_conv_block(1024, alpha, depth_multiplier,\n",
+    "                         strides=(2, 2))\n",
+    "    _depthwise_conv_block(1024, alpha, depth_multiplier)\n",
+    "    model.add(Dropout(rate=0.5))\n",
+    "\n",
+    "    model.add(AveragePooling2D(pool_size=2))\n",
+    "    model.add(Flatten())\n",
+    "    model.add(Dense(10, activation='softmax'))\n",
+    "\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/50\n",
+      "391/391 [==============================] - 36s 92ms/step - loss: 2.0463 - acc: 0.2513 - val_loss: 2.2584 - val_acc: 0.2776\n",
+      "Epoch 2/50\n",
+      "391/391 [==============================] - 31s 80ms/step - loss: 1.6583 - acc: 0.3977 - val_loss: 1.9809 - val_acc: 0.3528\n",
+      "Epoch 3/50\n",
+      "391/391 [==============================] - 32s 81ms/step - loss: 1.4632 - acc: 0.4740 - val_loss: 1.6017 - val_acc: 0.4475\n",
+      "Epoch 4/50\n",
+      "391/391 [==============================] - 32s 82ms/step - loss: 1.3280 - acc: 0.5213 - val_loss: 1.3650 - val_acc: 0.5158\n",
+      "Epoch 5/50\n",
+      "391/391 [==============================] - 32s 82ms/step - loss: 1.1912 - acc: 0.5727 - val_loss: 1.2064 - val_acc: 0.5807\n",
+      "Epoch 6/50\n",
+      "391/391 [==============================] - 31s 80ms/step - loss: 1.0700 - acc: 0.6203 - val_loss: 1.1014 - val_acc: 0.6085\n",
+      "Epoch 7/50\n",
+      "391/391 [==============================] - 31s 80ms/step - loss: 1.0232 - acc: 0.6350 - val_loss: 1.0276 - val_acc: 0.6331\n",
+      "Epoch 8/50\n",
+      "391/391 [==============================] - 31s 79ms/step - loss: 0.9783 - acc: 0.6498 - val_loss: 1.0276 - val_acc: 0.6398\n",
+      "Epoch 9/50\n",
+      "391/391 [==============================] - 31s 78ms/step - loss: 0.9359 - acc: 0.6673 - val_loss: 1.0931 - val_acc: 0.6226\n",
+      "Epoch 10/50\n",
+      "391/391 [==============================] - 30s 76ms/step - loss: 0.9058 - acc: 0.6776 - val_loss: 1.1184 - val_acc: 0.6105\n",
+      "Epoch 11/50\n",
+      "391/391 [==============================] - 30s 76ms/step - loss: 0.8781 - acc: 0.6884 - val_loss: 1.0405 - val_acc: 0.6498\n",
+      "Epoch 12/50\n",
+      "391/391 [==============================] - 29s 74ms/step - loss: 0.8502 - acc: 0.6998 - val_loss: 1.3313 - val_acc: 0.5514\n",
+      "Epoch 13/50\n",
+      "391/391 [==============================] - 29s 75ms/step - loss: 0.8211 - acc: 0.7077 - val_loss: 1.0371 - val_acc: 0.6443\n",
+      "Epoch 14/50\n",
+      "391/391 [==============================] - 28s 73ms/step - loss: 0.7958 - acc: 0.7181 - val_loss: 0.8624 - val_acc: 0.7061\n",
+      "Epoch 15/50\n",
+      "391/391 [==============================] - 29s 74ms/step - loss: 0.7720 - acc: 0.7257 - val_loss: 0.8090 - val_acc: 0.7224\n",
+      "Epoch 16/50\n",
+      "391/391 [==============================] - 29s 74ms/step - loss: 0.7508 - acc: 0.7324 - val_loss: 0.7846 - val_acc: 0.7272\n",
+      "Epoch 17/50\n",
+      "391/391 [==============================] - 30s 76ms/step - loss: 0.7292 - acc: 0.7432 - val_loss: 0.7717 - val_acc: 0.7323\n",
+      "Epoch 18/50\n",
+      "391/391 [==============================] - 33s 85ms/step - loss: 0.7047 - acc: 0.7511 - val_loss: 0.7407 - val_acc: 0.7453\n",
+      "Epoch 19/50\n",
+      "391/391 [==============================] - 33s 85ms/step - loss: 0.6884 - acc: 0.7570 - val_loss: 0.7040 - val_acc: 0.7537\n",
+      "Epoch 20/50\n",
+      "391/391 [==============================] - 33s 85ms/step - loss: 0.6718 - acc: 0.7652 - val_loss: 0.9103 - val_acc: 0.6907\n",
+      "Epoch 21/50\n",
+      "391/391 [==============================] - 33s 85ms/step - loss: 0.6489 - acc: 0.7722 - val_loss: 0.7054 - val_acc: 0.7567\n",
+      "Epoch 22/50\n",
+      "391/391 [==============================] - 33s 84ms/step - loss: 0.6330 - acc: 0.7771 - val_loss: 0.7032 - val_acc: 0.7587\n",
+      "Epoch 23/50\n",
+      "391/391 [==============================] - 33s 86ms/step - loss: 0.6157 - acc: 0.7831 - val_loss: 0.6955 - val_acc: 0.7652\n",
+      "Epoch 24/50\n",
+      "391/391 [==============================] - 32s 83ms/step - loss: 0.6027 - acc: 0.7882 - val_loss: 0.7599 - val_acc: 0.7419\n",
+      "Epoch 25/50\n",
+      "391/391 [==============================] - 32s 83ms/step - loss: 0.5910 - acc: 0.7932 - val_loss: 0.6315 - val_acc: 0.7830\n",
+      "Epoch 26/50\n",
+      "391/391 [==============================] - 33s 84ms/step - loss: 0.5743 - acc: 0.7967 - val_loss: 0.6595 - val_acc: 0.7772\n",
+      "Epoch 27/50\n",
+      "391/391 [==============================] - 32s 82ms/step - loss: 0.5610 - acc: 0.8027 - val_loss: 0.6628 - val_acc: 0.7691\n",
+      "Epoch 28/50\n",
+      "391/391 [==============================] - 31s 80ms/step - loss: 0.5508 - acc: 0.8074 - val_loss: 0.6211 - val_acc: 0.7820\n",
+      "Epoch 29/50\n",
+      "391/391 [==============================] - 31s 81ms/step - loss: 0.5293 - acc: 0.8130 - val_loss: 0.6221 - val_acc: 0.7852\n",
+      "Epoch 30/50\n",
+      "391/391 [==============================] - 30s 78ms/step - loss: 0.5205 - acc: 0.8158 - val_loss: 0.6252 - val_acc: 0.7891\n",
+      "Epoch 31/50\n",
+      "391/391 [==============================] - 31s 79ms/step - loss: 0.5128 - acc: 0.8195 - val_loss: 0.6213 - val_acc: 0.7844\n",
+      "Epoch 32/50\n",
+      "391/391 [==============================] - 29s 75ms/step - loss: 0.4988 - acc: 0.8241 - val_loss: 0.5745 - val_acc: 0.8046\n",
+      "Epoch 33/50\n",
+      "391/391 [==============================] - 30s 76ms/step - loss: 0.4898 - acc: 0.8273 - val_loss: 0.5938 - val_acc: 0.8015\n",
+      "Epoch 34/50\n",
+      "391/391 [==============================] - 28s 72ms/step - loss: 0.4789 - acc: 0.8294 - val_loss: 0.5693 - val_acc: 0.8074\n",
+      "Epoch 35/50\n",
+      "391/391 [==============================] - 28s 72ms/step - loss: 0.4646 - acc: 0.8371 - val_loss: 0.5743 - val_acc: 0.8042\n",
+      "Epoch 36/50\n",
+      "391/391 [==============================] - 29s 74ms/step - loss: 0.4605 - acc: 0.8364 - val_loss: 0.6270 - val_acc: 0.7865\n",
+      "Epoch 37/50\n",
+      "391/391 [==============================] - 33s 84ms/step - loss: 0.4499 - acc: 0.8405 - val_loss: 0.6014 - val_acc: 0.7960\n",
+      "Epoch 38/50\n",
+      "391/391 [==============================] - 32s 83ms/step - loss: 0.4459 - acc: 0.8438 - val_loss: 0.6058 - val_acc: 0.7971\n",
+      "Epoch 39/50\n",
+      "391/391 [==============================] - 33s 84ms/step - loss: 0.4341 - acc: 0.8473 - val_loss: 0.5551 - val_acc: 0.8125\n",
+      "Epoch 40/50\n",
+      "391/391 [==============================] - 33s 83ms/step - loss: 0.4238 - acc: 0.8496 - val_loss: 0.5445 - val_acc: 0.8165\n",
+      "Epoch 41/50\n",
+      "391/391 [==============================] - 33s 83ms/step - loss: 0.4160 - acc: 0.8516 - val_loss: 0.5873 - val_acc: 0.8058\n",
+      "Epoch 42/50\n",
+      "391/391 [==============================] - 32s 83ms/step - loss: 0.4048 - acc: 0.8546 - val_loss: 0.6693 - val_acc: 0.7822\n",
+      "Epoch 43/50\n",
+      "391/391 [==============================] - 33s 83ms/step - loss: 0.4021 - acc: 0.8583 - val_loss: 0.5408 - val_acc: 0.8168\n",
+      "Epoch 44/50\n",
+      "391/391 [==============================] - 33s 84ms/step - loss: 0.3927 - acc: 0.8610 - val_loss: 0.5538 - val_acc: 0.8117\n",
+      "Epoch 45/50\n",
+      "391/391 [==============================] - 33s 83ms/step - loss: 0.3904 - acc: 0.8615 - val_loss: 0.5981 - val_acc: 0.8063\n",
+      "Epoch 46/50\n",
+      "391/391 [==============================] - 32s 81ms/step - loss: 0.3811 - acc: 0.8642 - val_loss: 0.5285 - val_acc: 0.8223\n",
+      "Epoch 47/50\n",
+      "391/391 [==============================] - 32s 83ms/step - loss: 0.3726 - acc: 0.8676 - val_loss: 0.5317 - val_acc: 0.8206\n",
+      "Epoch 48/50\n",
+      "391/391 [==============================] - 31s 79ms/step - loss: 0.3661 - acc: 0.8692 - val_loss: 0.5568 - val_acc: 0.8172\n",
+      "Epoch 49/50\n",
+      "391/391 [==============================] - 31s 79ms/step - loss: 0.3582 - acc: 0.8711 - val_loss: 0.5057 - val_acc: 0.8320\n",
+      "Epoch 50/50\n",
+      "391/391 [==============================] - 29s 75ms/step - loss: 0.3516 - acc: 0.8740 - val_loss: 0.5760 - val_acc: 0.8099\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "conv2d_1 (Conv2D)            (None, 32, 32, 32)        864       \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_1 (Batch (None, 32, 32, 32)        128       \n",
+      "_________________________________________________________________\n",
+      "activation_1 (Activation)    (None, 32, 32, 32)        0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_1 (ZeroPaddin (None, 32, 34, 34)        0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_1 (Depthwis (None, 32, 32, 32)        288       \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_2 (Batch (None, 32, 32, 32)        128       \n",
+      "_________________________________________________________________\n",
+      "activation_2 (Activation)    (None, 32, 32, 32)        0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_2 (Conv2D)            (None, 64, 32, 32)        2048      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_3 (Batch (None, 64, 32, 32)        256       \n",
+      "_________________________________________________________________\n",
+      "activation_3 (Activation)    (None, 64, 32, 32)        0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_2 (ZeroPaddin (None, 64, 34, 34)        0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_2 (Depthwis (None, 64, 16, 16)        576       \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_4 (Batch (None, 64, 16, 16)        256       \n",
+      "_________________________________________________________________\n",
+      "activation_4 (Activation)    (None, 64, 16, 16)        0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_3 (Conv2D)            (None, 128, 16, 16)       8192      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_5 (Batch (None, 128, 16, 16)       512       \n",
+      "_________________________________________________________________\n",
+      "activation_5 (Activation)    (None, 128, 16, 16)       0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_3 (ZeroPaddin (None, 128, 18, 18)       0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_3 (Depthwis (None, 128, 16, 16)       1152      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_6 (Batch (None, 128, 16, 16)       512       \n",
+      "_________________________________________________________________\n",
+      "activation_6 (Activation)    (None, 128, 16, 16)       0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_4 (Conv2D)            (None, 128, 16, 16)       16384     \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_7 (Batch (None, 128, 16, 16)       512       \n",
+      "_________________________________________________________________\n",
+      "activation_7 (Activation)    (None, 128, 16, 16)       0         \n",
+      "_________________________________________________________________\n",
+      "dropout_1 (Dropout)          (None, 128, 16, 16)       0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_4 (ZeroPaddin (None, 128, 18, 18)       0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_4 (Depthwis (None, 128, 8, 8)         1152      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_8 (Batch (None, 128, 8, 8)         512       \n",
+      "_________________________________________________________________\n",
+      "activation_8 (Activation)    (None, 128, 8, 8)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_5 (Conv2D)            (None, 256, 8, 8)         32768     \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_9 (Batch (None, 256, 8, 8)         1024      \n",
+      "_________________________________________________________________\n",
+      "activation_9 (Activation)    (None, 256, 8, 8)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_5 (ZeroPaddin (None, 256, 10, 10)       0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_5 (Depthwis (None, 256, 8, 8)         2304      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_10 (Batc (None, 256, 8, 8)         1024      \n",
+      "_________________________________________________________________\n",
+      "activation_10 (Activation)   (None, 256, 8, 8)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_6 (Conv2D)            (None, 256, 8, 8)         65536     \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_11 (Batc (None, 256, 8, 8)         1024      \n",
+      "_________________________________________________________________\n",
+      "activation_11 (Activation)   (None, 256, 8, 8)         0         \n",
+      "_________________________________________________________________\n",
+      "dropout_2 (Dropout)          (None, 256, 8, 8)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_6 (ZeroPaddin (None, 256, 10, 10)       0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_6 (Depthwis (None, 256, 4, 4)         2304      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_12 (Batc (None, 256, 4, 4)         1024      \n",
+      "_________________________________________________________________\n",
+      "activation_12 (Activation)   (None, 256, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_7 (Conv2D)            (None, 512, 4, 4)         131072    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_13 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_13 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_7 (ZeroPaddin (None, 512, 6, 6)         0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_7 (Depthwis (None, 512, 4, 4)         4608      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_14 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_14 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_8 (Conv2D)            (None, 512, 4, 4)         262144    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_15 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_15 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_8 (ZeroPaddin (None, 512, 6, 6)         0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_8 (Depthwis (None, 512, 4, 4)         4608      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_16 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_16 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_9 (Conv2D)            (None, 512, 4, 4)         262144    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_17 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_17 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "dropout_3 (Dropout)          (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_9 (ZeroPaddin (None, 512, 6, 6)         0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_9 (Depthwis (None, 512, 4, 4)         4608      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_18 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_18 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_10 (Conv2D)           (None, 512, 4, 4)         262144    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_19 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_19 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_10 (ZeroPaddi (None, 512, 6, 6)         0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_10 (Depthwi (None, 512, 4, 4)         4608      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_20 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_20 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_11 (Conv2D)           (None, 512, 4, 4)         262144    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_21 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_21 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_11 (ZeroPaddi (None, 512, 6, 6)         0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_11 (Depthwi (None, 512, 4, 4)         4608      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_22 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_22 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_12 (Conv2D)           (None, 512, 4, 4)         262144    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_23 (Batc (None, 512, 4, 4)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_23 (Activation)   (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "dropout_4 (Dropout)          (None, 512, 4, 4)         0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_12 (ZeroPaddi (None, 512, 6, 6)         0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_12 (Depthwi (None, 512, 2, 2)         4608      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_24 (Batc (None, 512, 2, 2)         2048      \n",
+      "_________________________________________________________________\n",
+      "activation_24 (Activation)   (None, 512, 2, 2)         0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_13 (Conv2D)           (None, 1024, 2, 2)        524288    \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_25 (Batc (None, 1024, 2, 2)        4096      \n",
+      "_________________________________________________________________\n",
+      "activation_25 (Activation)   (None, 1024, 2, 2)        0         \n",
+      "_________________________________________________________________\n",
+      "zero_padding2d_13 (ZeroPaddi (None, 1024, 4, 4)        0         \n",
+      "_________________________________________________________________\n",
+      "depthwise_conv2d_13 (Depthwi (None, 1024, 2, 2)        9216      \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_26 (Batc (None, 1024, 2, 2)        4096      \n",
+      "_________________________________________________________________\n",
+      "activation_26 (Activation)   (None, 1024, 2, 2)        0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_14 (Conv2D)           (None, 1024, 2, 2)        1048576   \n",
+      "_________________________________________________________________\n",
+      "batch_normalization_27 (Batc (None, 1024, 2, 2)        4096      \n",
+      "_________________________________________________________________\n",
+      "activation_27 (Activation)   (None, 1024, 2, 2)        0         \n",
+      "_________________________________________________________________\n",
+      "dropout_5 (Dropout)          (None, 1024, 2, 2)        0         \n",
+      "_________________________________________________________________\n",
+      "average_pooling2d_1 (Average (None, 1024, 1, 1)        0         \n",
+      "_________________________________________________________________\n",
+      "flatten_1 (Flatten)          (None, 1024)              0         \n",
+      "_________________________________________________________________\n",
+      "dense_1 (Dense)              (None, 10)                10250     \n",
+      "=================================================================\n",
+      "Total params: 3,239,114\n",
+      "Trainable params: 3,217,226\n",
+      "Non-trainable params: 21,888\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "# data augmentation, horizontal flips only\n",
+    "datagen = ImageDataGenerator(\n",
+    "        featurewise_center=False,\n",
+    "        featurewise_std_normalization=False,\n",
+    "        rotation_range=0.0,\n",
+    "        width_shift_range=0.0,\n",
+    "        height_shift_range=0.0,\n",
+    "        vertical_flip=False,\n",
+    "        horizontal_flip=True)\n",
+    "datagen.fit(X_train)\n",
+    "\n",
+    "model = get_mobilenet()\n",
+    "\n",
+    "learning_rates=[]\n",
+    "for i in range(5):\n",
+    "    learning_rates.append(2e-2)\n",
+    "for i in range(50-5):\n",
+    "    learning_rates.append(1e-2)\n",
+    "for i in range(100-50):\n",
+    "    learning_rates.append(8e-3)\n",
+    "for i in range(150-100):\n",
+    "    learning_rates.append(4e-3)\n",
+    "for i in range(200-150):\n",
+    "    learning_rates.append(2e-3)\n",
+    "for i in range(300-200):\n",
+    "    learning_rates.append(1e-3)\n",
+    "\n",
+    "callbacks = [\n",
+    "    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))\n",
+    "]\n",
+    "\n",
+    "model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), \n",
+    "                       loss='categorical_crossentropy', \n",
+    "                       metrics=['accuracy'])\n",
+    "\n",
+    "model.fit_generator(\n",
+    "    datagen.flow(X_train, y_train, batch_size=128),\n",
+    "    steps_per_epoch=int(np.ceil(50000 / 128)),\n",
+    "    validation_data=(X_test, y_test),\n",
+    "    #epochs=300,\n",
+    "    epochs=50,\n",
+    "    callbacks=callbacks\n",
+    ")\n",
+    "\n",
+    "model.summary()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time:  0.012634992599487305\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "start_time = time.time()\n",
+    "keras_result = model.predict(X_test[:20])\n",
+    "print(\"time: \", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras2onnx\n",
+    "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n",
+    "import onnx\n",
+    "onnx.save(onnx_model, \"../models/keras/mobilenet.onnx\")\n",
+    "import pickle\n",
+    "with open('dumps/mobilenet_keras_dump', 'wb') as fp:\n",
+    "    pickle.dump(keras_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/mobilenet_onnx.ipynb b/hpvm/projects/onnx/src/mobilenet_onnx.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8b26d865421ca5c819b32abeb295b0169254a055
--- /dev/null
+++ b/hpvm/projects/onnx/src/mobilenet_onnx.ipynb
@@ -0,0 +1,210 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "import keras\n",
+    "from keras.datasets import cifar10\n",
+    "from keras import backend as K\n",
+    "from keras.utils import to_categorical\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import keras2onnx\n",
+    "import onnx\n",
+    "import onnxruntime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "X_train.shape =  (50000, 3, 32, 32)\n",
+      "X_test.shape =  (10000, 3, 32, 32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n",
+    "test_labels = y_test\n",
+    "\n",
+    "print (\"X_train.shape = \", X_train.shape)\n",
+    "print (\"X_test.shape = \", X_test.shape)\n",
+    "\n",
+    "\n",
+    "X_train = X_train.astype('float32')\n",
+    "X_test = X_test.astype('float32')\n",
+    "\n",
+    "\n",
+    "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n",
+    "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n",
+    "\n",
+    "X_train = (X_train - mean) / (std + 1e-9)\n",
+    "X_test = (X_test - mean) / (std + 1e-9)\n",
+    "\n",
+    "y_train = to_categorical(y_train, num_classes=10)\n",
+    "y_test = to_categorical(y_test, num_classes=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sess = onnxruntime.InferenceSession(\"../models/keras/mobilenet.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input name  : conv2d_1_input\n",
+      "Input shape : [None, 3, 32, 32]\n",
+      "Input type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = sess.get_inputs()[0].name\n",
+    "print(\"Input name  :\", input_name)\n",
+    "input_shape = sess.get_inputs()[0].shape\n",
+    "print(\"Input shape :\", input_shape)\n",
+    "input_type = sess.get_inputs()[0].type\n",
+    "print(\"Input type  :\", input_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output name  : dense_1\n",
+      "Output shape : [None, 10]\n",
+      "Output type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "output_name = sess.get_outputs()[0].name\n",
+    "print(\"Output name  :\", output_name)  \n",
+    "output_shape = sess.get_outputs()[0].shape\n",
+    "print(\"Output shape :\", output_shape)\n",
+    "output_type = sess.get_outputs()[0].type\n",
+    "print(\"Output type  :\", output_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time:  0.006893157958984375\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "start_time = time.time()\n",
+    "ort_result = sess.run([output_name], {input_name: X_test[:40]})\n",
+    "print(\"time: \", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "with open('dumps/mobilenet_ort_dump', 'wb') as fp:\n",
+    "    pickle.dump(ort_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/mobilenet_ort_dump', 'rb') as fp:\n",
+    "    ort_res = pickle.load(fp)\n",
+    "with open ('dumps/mobilenet_keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(ort_res[0], keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/mobilenet_tvm.ipynb b/hpvm/projects/onnx/src/mobilenet_tvm.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..c0b71588afdea134ed4a2aa675d2c6058772b0b0
--- /dev/null
+++ b/hpvm/projects/onnx/src/mobilenet_tvm.ipynb
@@ -0,0 +1,381 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import cifar10\n",
+    "from keras import backend as K\n",
+    "import sys\n",
+    "import struct\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import tvm\n",
+    "import tvm.relay as relay\n",
+    "from tvm.contrib import graph_runtime\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n",
+    "X_test = X_test.astype('float32')\n",
+    "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n",
+    "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n",
+    "\n",
+    "X_test = (X_test - mean) / (std + 1e-9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print(X_test.shape)\n",
+    "#X_test = X_test[:8000]\n",
+    "print(X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#import keras\n",
+    "#model = keras.applications.mobilenet.MobileNet(input_shape=X_test[0].shape, include_top=False, weights=None)\n",
+    "#import keras2onnx\n",
+    "#onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n",
+    "import onnx\n",
+    "onnx_model = onnx.load(\"../models/keras/mobilenet.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target='cuda -libs=cudnn,cublas'\n",
+    "input_name = 'conv2d_1_input'\n",
+    "shape_dict = {input_name: X_test[:10].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "ctx = tvm.gpu()\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    executor = relay.build_module.create_executor('graph', mod, ctx, target)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# LLVM EXECUTE SUCCEEDED\n",
+    "import time\n",
+    "start_time = time.time()\n",
+    "tvm_out = executor.evaluate()(tvm.nd.array(X_test[:1000]), **params)\n",
+    "print(\"Time:\", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n",
+    "import pickle\n",
+    "with open('dumps/tvm_dump', 'wb') as fp:\n",
+    "    pickle.dump(tvm_output, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.avg_pool2d\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (40, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 1024, 2, 2, 'float32'), (1024, 1024, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 1024, 2, 2, 'float32'), (1024, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 512, 2, 2, 'float32'), (1024, 512, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 512, 4, 4, 'float32'), (512, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 512, 4, 4, 'float32'), (512, 512, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 512, 4, 4, 'float32'), (512, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 256, 4, 4, 'float32'), (512, 256, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 256, 8, 8, 'float32'), (256, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 256, 8, 8, 'float32'), (256, 256, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 256, 8, 8, 'float32'), (256, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 128, 8, 8, 'float32'), (256, 128, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 128, 16, 16, 'float32'), (128, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 128, 16, 16, 'float32'), (128, 128, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 128, 16, 16, 'float32'), (128, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 64, 16, 16, 'float32'), (128, 64, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 64, 32, 32, 'float32'), (64, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 32, 32, 32, 'float32'), (64, 32, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 32, 32, 32, 'float32'), (32, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 3, 32, 32, 'float32'), (32, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = 'conv2d_1_input'\n",
+    "input_size = 40\n",
+    "shape_dict = {input_name: X_test[:input_size].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "target = 'cuda -libs=cudnn,cublas'\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    graph, lib, params = relay.build(mod, target, params=params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "ctx = tvm.gpu()\n",
+    "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n",
+    "# create module\n",
+    "module = graph_runtime.create(graph, lib, ctx)\n",
+    "# set input and parameters\n",
+    "module.set_input(\"conv2d_1_input\", X_test[:input_size])\n",
+    "module.set_input(**params)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Time: 0.012343168258666992\n"
+     ]
+    }
+   ],
+   "source": [
+    "# run\n",
+    "start_time = time.time()\n",
+    "module.run()\n",
+    "out_shape = (input_size, 10)\n",
+    "# get output\n",
+    "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n",
+    "print(\"Time:\", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mobilenet            17.814301 ms        (0.00 ms)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# evaluate\n",
+    "ftimer = module.module.time_evaluator(\"run\", ctx, number=1)\n",
+    "prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond\n",
+    "print(\"%-20s %-19s (%s)\" % (\"mobilenet\", \"%.6f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/tvm_dump', 'rb') as fp:\n",
+    "    tvm_res = pickle.load(fp)\n",
+    "with open ('dumps/keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(tvm_res, keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test\n",
+    "print(\"Accuracy matched!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use TVM's implementation of mobilenet\n",
+    "import sys\n",
+    "import struct\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import tvm\n",
+    "import tvm.relay as relay\n",
+    "from tvm.relay import testing\n",
+    "from tvm.contrib import graph_runtime\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 100\n",
+    "dtype = 'float32'\n",
+    "target='cuda -libs=cudnn,cublas'\n",
+    "net, params = testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype)\n",
+    "input_shape = (batch_size, 3, 224, 224)\n",
+    "output_shape = (batch_size, 1000)\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    graph, lib, params = relay.build(net, target=target, params=params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ctx = tvm.context(str(target), 0)\n",
+    "module = graph_runtime.create(graph, lib, ctx)\n",
+    "data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))\n",
+    "module.set_input('data', data_tvm)\n",
+    "module.set_input(**params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# evaluate\n",
+    "ftimer = module.module.time_evaluator(\"run\", ctx, number=1)\n",
+    "prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond\n",
+    "print(\"%-20s %-19s (%s)\" % (\"mobilenet\", \"%.2f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/vgg16_keras.ipynb b/hpvm/projects/onnx/src/vgg16_keras.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e1d1422ead3c3bc5afa602a13b3087ecbebc8eab
--- /dev/null
+++ b/hpvm/projects/onnx/src/vgg16_keras.ipynb
@@ -0,0 +1,392 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import keras\n",
+    "from keras.datasets import cifar10\n",
+    "from keras.preprocessing.image import ImageDataGenerator\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Dropout, Activation, Flatten\n",
+    "from keras.layers import Conv2D, MaxPooling2D, BatchNormalization\n",
+    "from keras import optimizers\n",
+    "import numpy as np\n",
+    "from keras.layers.core import Lambda\n",
+    "from keras import backend as K\n",
+    "from keras import regularizers\n",
+    "import os\n",
+    "import sys"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class cifar10vgg:\n",
+    "    def __init__(self,train=True):\n",
+    "        self.num_classes = 10\n",
+    "        self.weight_decay = 0.0005\n",
+    "        self.x_shape = [3,32,32]\n",
+    "\n",
+    "        self.model = self.build_model()\n",
+    "        if train:\n",
+    "            self.model = self.train(self.model)\n",
+    "        else:\n",
+    "            self.model.load_weights('cifar10vgg.h5')\n",
+    "\n",
+    "\n",
+    "    def build_model(self):\n",
+    "        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.\n",
+    "\n",
+    "        model = Sequential()\n",
+    "        weight_decay = self.weight_decay\n",
+    "\n",
+    "        model.add(Conv2D(64, (3, 3), padding='same',\n",
+    "                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        #model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.3))\n",
+    "\n",
+    "        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "\n",
+    "        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        #model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        #model.add(BatchNormalization())\n",
+    "        model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "\n",
+    "        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        #model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "\n",
+    "        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "\n",
+    "        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(Dropout(0.4))\n",
+    "\n",
+    "        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "        model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "        model.add(Dropout(0.5))\n",
+    "\n",
+    "        model.add(Flatten())\n",
+    "        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))\n",
+    "        model.add(Activation('relu'))\n",
+    "        # model.add(BatchNormalization())\n",
+    "\n",
+    "        model.add(Dropout(0.5))\n",
+    "        model.add(Dense(self.num_classes))\n",
+    "        model.add(Activation('softmax'))\n",
+    "        return model\n",
+    "\n",
+    "\n",
+    "    def normalize(self,X_train,X_test):\n",
+    "        #this function normalize inputs for zero mean and unit variance\n",
+    "        # it is used when training a model.\n",
+    "        # Input: training set and test set\n",
+    "        # Output: normalized training set and test set according to the trianing set statistics.\n",
+    "        mean = np.mean(X_train,axis=(0,1,2,3))\n",
+    "        std = np.std(X_train, axis=(0, 1, 2, 3))\n",
+    "        X_train = (X_train-mean)/(std+1e-7)\n",
+    "        X_test = (X_test-mean)/(std+1e-7)\n",
+    "        return X_train, X_test\n",
+    "\n",
+    "    \n",
+    "    def normalize_production(self,x):\n",
+    "        #this function is used to normalize instances in production according to saved training set statistics\n",
+    "        # Input: X - a training set\n",
+    "        # Output X - a normalized training set according to normalization constants.\n",
+    "\n",
+    "        #these values produced during first training and are general for the standard cifar10 training set normalization\n",
+    "        mean = 120.707\n",
+    "        std = 64.15\n",
+    "        return (x-mean)/(std+1e-7)\n",
+    "\n",
+    "    \n",
+    "    def predict(self,x,normalize=True,batch_size=50):\n",
+    "        if normalize:\n",
+    "            x = self.normalize_production(x)\n",
+    "        return self.model.predict(x,batch_size)\n",
+    "\n",
+    "    \n",
+    "    def train(self,model):\n",
+    "\n",
+    "        #training parameters\n",
+    "        batch_size = 128\n",
+    "        #maxepoches = 250\n",
+    "        #maxepoches = 250\n",
+    "        maxepoches = 30\n",
+    "        learning_rate = 0.01\n",
+    "        lr_decay = 1e-6\n",
+    "        lr_drop = 20\n",
+    "        # The data, shuffled and split between train and test sets:\n",
+    "        (x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
+    "        x_train = x_train.astype('float32')\n",
+    "        x_test = x_test.astype('float32')\n",
+    "        x_train, x_test = self.normalize(x_train, x_test)\n",
+    "\n",
+    "        y_train = keras.utils.to_categorical(y_train, self.num_classes)\n",
+    "        y_test = keras.utils.to_categorical(y_test, self.num_classes)\n",
+    "\n",
+    "        def lr_scheduler(epoch):\n",
+    "            return learning_rate * (0.5 ** (epoch // lr_drop))\n",
+    "        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)\n",
+    "\n",
+    "        #data augmentation\n",
+    "        datagen = ImageDataGenerator(\n",
+    "            featurewise_center=False,  # set input mean to 0 over the dataset\n",
+    "            samplewise_center=False,  # set each sample mean to 0\n",
+    "            featurewise_std_normalization=False,  # divide inputs by std of the dataset\n",
+    "            samplewise_std_normalization=False,  # divide each input by its std\n",
+    "            zca_whitening=False,  # apply ZCA whitening\n",
+    "            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)\n",
+    "            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)\n",
+    "            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)\n",
+    "            horizontal_flip=True,  # randomly flip images\n",
+    "            vertical_flip=False)  # randomly flip images\n",
+    "        # (std, mean, and principal components if ZCA whitening is applied).\n",
+    "        datagen.fit(x_train)\n",
+    "\n",
+    "\n",
+    "\n",
+    "        #optimization details\n",
+    "        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)\n",
+    "        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])\n",
+    "\n",
+    "\n",
+    "        # training process in a for loop with learning rate drop every 25 epoches.\n",
+    "\n",
+    "        historytemp = model.fit_generator(datagen.flow(x_train, y_train,\n",
+    "                                         batch_size=batch_size),\n",
+    "                            steps_per_epoch=x_train.shape[0] // batch_size,\n",
+    "                            epochs=maxepoches,\n",
+    "                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)\n",
+    "        \n",
+    "        model.save_weights('cifar10vgg.h5')\n",
+    "        return model\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Variable *= will be deprecated. Use `var.assign(var * other)` if you want assignment to the variable value or `x = x * y` if you want a new python Tensor object.\n",
+      "Epoch 1/30\n",
+      " - 29s - loss: 4.3002 - acc: 0.1547 - val_loss: 4.2265 - val_acc: 0.1443\n",
+      "Epoch 2/30\n",
+      " - 23s - loss: 3.9048 - acc: 0.2016 - val_loss: 3.9923 - val_acc: 0.1640\n",
+      "Epoch 3/30\n",
+      " - 23s - loss: 3.7052 - acc: 0.2210 - val_loss: 3.8900 - val_acc: 0.1712\n",
+      "Epoch 4/30\n",
+      " - 22s - loss: 3.4890 - acc: 0.2839 - val_loss: 3.4656 - val_acc: 0.2731\n",
+      "Epoch 5/30\n",
+      " - 25s - loss: 3.2615 - acc: 0.3258 - val_loss: 3.1615 - val_acc: 0.3296\n",
+      "Epoch 6/30\n",
+      " - 23s - loss: 3.0814 - acc: 0.3557 - val_loss: 3.0238 - val_acc: 0.3395\n",
+      "Epoch 7/30\n",
+      " - 23s - loss: 2.9114 - acc: 0.3883 - val_loss: 2.8283 - val_acc: 0.4041\n",
+      "Epoch 8/30\n",
+      " - 23s - loss: 2.7543 - acc: 0.4239 - val_loss: 2.5891 - val_acc: 0.4622\n",
+      "Epoch 9/30\n",
+      " - 22s - loss: 2.6056 - acc: 0.4594 - val_loss: 2.5562 - val_acc: 0.4796\n",
+      "Epoch 10/30\n",
+      " - 25s - loss: 2.4590 - acc: 0.5013 - val_loss: 2.2543 - val_acc: 0.5714\n",
+      "Epoch 11/30\n",
+      " - 24s - loss: 2.3017 - acc: 0.5439 - val_loss: 2.1377 - val_acc: 0.5954\n",
+      "Epoch 12/30\n",
+      " - 22s - loss: 2.1519 - acc: 0.5811 - val_loss: 1.9902 - val_acc: 0.6356\n",
+      "Epoch 13/30\n",
+      " - 23s - loss: 2.0276 - acc: 0.6106 - val_loss: 1.8868 - val_acc: 0.6494\n",
+      "Epoch 14/30\n",
+      " - 22s - loss: 1.9065 - acc: 0.6357 - val_loss: 1.7370 - val_acc: 0.6813\n",
+      "Epoch 15/30\n",
+      " - 22s - loss: 1.8081 - acc: 0.6532 - val_loss: 1.7568 - val_acc: 0.6721\n",
+      "Epoch 16/30\n",
+      " - 22s - loss: 1.7116 - acc: 0.6736 - val_loss: 1.6687 - val_acc: 0.6959\n",
+      "Epoch 17/30\n",
+      " - 22s - loss: 1.6337 - acc: 0.6883 - val_loss: 1.4748 - val_acc: 0.7350\n",
+      "Epoch 18/30\n",
+      " - 22s - loss: 1.5609 - acc: 0.7022 - val_loss: 1.4428 - val_acc: 0.7445\n",
+      "Epoch 19/30\n",
+      " - 23s - loss: 1.4890 - acc: 0.7160 - val_loss: 1.3682 - val_acc: 0.7582\n",
+      "Epoch 20/30\n",
+      " - 22s - loss: 1.4291 - acc: 0.7298 - val_loss: 1.3004 - val_acc: 0.7717\n",
+      "Epoch 21/30\n",
+      " - 22s - loss: 1.3169 - acc: 0.7593 - val_loss: 1.2640 - val_acc: 0.7800\n",
+      "Epoch 22/30\n",
+      " - 24s - loss: 1.2843 - acc: 0.7669 - val_loss: 1.2191 - val_acc: 0.7864\n",
+      "Epoch 23/30\n",
+      " - 23s - loss: 1.2431 - acc: 0.7757 - val_loss: 1.1598 - val_acc: 0.7998\n",
+      "Epoch 24/30\n",
+      " - 22s - loss: 1.2166 - acc: 0.7813 - val_loss: 1.1435 - val_acc: 0.8097\n",
+      "Epoch 25/30\n",
+      " - 22s - loss: 1.1872 - acc: 0.7869 - val_loss: 1.1180 - val_acc: 0.8123\n",
+      "Epoch 26/30\n",
+      " - 23s - loss: 1.1634 - acc: 0.7916 - val_loss: 1.0977 - val_acc: 0.8123\n",
+      "Epoch 27/30\n",
+      " - 22s - loss: 1.1300 - acc: 0.7965 - val_loss: 1.0785 - val_acc: 0.8199\n",
+      "Epoch 28/30\n",
+      " - 22s - loss: 1.1169 - acc: 0.7983 - val_loss: 1.0927 - val_acc: 0.8120\n",
+      "Epoch 29/30\n",
+      " - 22s - loss: 1.0944 - acc: 0.8025 - val_loss: 1.0141 - val_acc: 0.8275\n",
+      "Epoch 30/30\n",
+      " - 22s - loss: 1.0686 - acc: 0.8069 - val_loss: 1.0341 - val_acc: 0.8206\n"
+     ]
+    }
+   ],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
+    "\n",
+    "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
+    "test_labels = y_test\n",
+    "train_labels = y_train\n",
+    "x_train = x_train.astype('float32')\n",
+    "x_test = x_test.astype('float32')\n",
+    "\n",
+    "y_train = keras.utils.to_categorical(y_train, 10)\n",
+    "y_test = keras.utils.to_categorical(y_test, 10)\n",
+    "\n",
+    "model = cifar10vgg()\n",
+    "\n",
+    "#predicted_x = model.predict(x_test)\n",
+    "\n",
+    "#norm_test = model.normalize_production(x_test)\n",
+    "\n",
+    "# Normalizing train data before dumping\n",
+    "#x_train, x_test = model.normalize(x_train, x_test)\n",
+    "#x_train = model.normalize_production(x_train)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time: 1.3477363586425781\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "start = time.time()\n",
+    "result = model.predict(x_test[:8000])\n",
+    "print(\"time:\", time.time() - start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras2onnx\n",
+    "onnx_model = keras2onnx.convert_keras(model.model, model.model.name, target_opset=10)\n",
+    "import onnx\n",
+    "onnx.save(onnx_model, \"../models/keras/vgg16_cifar10.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "with open('dumps/vgg16_keras_dump', 'wb') as fp:\n",
+    "    pickle.dump(result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/vgg16_onnx.ipynb b/hpvm/projects/onnx/src/vgg16_onnx.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..9d0453cfb754a488d06a00afa78383ff9269e326
--- /dev/null
+++ b/hpvm/projects/onnx/src/vgg16_onnx.ipynb
@@ -0,0 +1,158 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import cifar10\n",
+    "from keras import backend as K\n",
+    "import keras\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import keras2onnx\n",
+    "import onnx\n",
+    "import onnxruntime\n",
+    "import time\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
+    "test_labels = y_test\n",
+    "train_labels = y_train\n",
+    "x_train = x_train.astype('float32')\n",
+    "x_test = x_test.astype('float32')\n",
+    "\n",
+    "y_train = keras.utils.to_categorical(y_train, 10)\n",
+    "y_test = keras.utils.to_categorical(y_test, 10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input name  : conv2d_1_input\n",
+      "Input shape : [None, 3, 32, 32]\n",
+      "Input type  : tensor(float)\n",
+      "Output name  : activation_15\n",
+      "Output shape : [None, 10]\n",
+      "Output type  : tensor(float)\n"
+     ]
+    }
+   ],
+   "source": [
+    "sess = onnxruntime.InferenceSession(\"../models/keras/vgg16_cifar10.onnx\")\n",
+    "input_name = sess.get_inputs()[0].name\n",
+    "print(\"Input name  :\", input_name)\n",
+    "input_shape = sess.get_inputs()[0].shape\n",
+    "print(\"Input shape :\", input_shape)\n",
+    "input_type = sess.get_inputs()[0].type\n",
+    "print(\"Input type  :\", input_type)\n",
+    "output_name = sess.get_outputs()[0].name\n",
+    "print(\"Output name  :\", output_name)  \n",
+    "output_shape = sess.get_outputs()[0].shape\n",
+    "print(\"Output shape :\", output_shape)\n",
+    "output_type = sess.get_outputs()[0].type\n",
+    "print(\"Output type  :\", output_type)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "time:  1.4347426891326904\n"
+     ]
+    }
+   ],
+   "source": [
+    "start_time = time.time()\n",
+    "ort_result = sess.run([output_name], {input_name: x_test[:8000]})\n",
+    "print(\"time: \", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "with open('dumps/vgg16_ort_dump', 'wb') as fp:\n",
+    "    pickle.dump(ort_result, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/vgg16_ort_dump', 'rb') as fp:\n",
+    "    ort_res = pickle.load(fp)\n",
+    "with open ('dumps/vgg16_keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(ort_res[0], keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 1)  #using decimal of 3 would pass test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hpvm/projects/onnx/src/vgg16_tvm.ipynb b/hpvm/projects/onnx/src/vgg16_tvm.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8cea48d44615e3d407d25b943df6dbb86923a2d0
--- /dev/null
+++ b/hpvm/projects/onnx/src/vgg16_tvm.ipynb
@@ -0,0 +1,240 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from keras.datasets import cifar10\n",
+    "from keras import backend as K\n",
+    "import sys\n",
+    "import struct\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import tvm\n",
+    "import tvm.relay as relay\n",
+    "from tvm.contrib import graph_runtime\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "K.set_image_data_format('channels_first')\n",
+    "\n",
+    "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
+    "x_test = x_test.astype('float32')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print(X_test.shape)\n",
+    "#X_test = X_test[:8000]\n",
+    "#print(X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnx\n",
+    "onnx_model = onnx.load(\"../models/keras/vgg16_cifar10.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target='cuda -libs=cudnn,cublas'\n",
+    "input_name = 'conv2d_8_input'\n",
+    "shape_dict = {input_name: X_test[:1000].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "ctx = tvm.gpu()\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    executor = relay.build_module.create_executor('graph', mod, ctx, target)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# LLVM EXECUTE SUCCEEDED\n",
+    "import time\n",
+    "start_time = time.time()\n",
+    "tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')[:1000]), **params)\n",
+    "print(\"Time:\", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n",
+    "import pickle\n",
+    "with open('dumps/tvm_dump', 'wb') as fp:\n",
+    "    pickle.dump(tvm_output, fp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n",
+      "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (6000, 512, 'float32'), (10, 512, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (6000, 512, 'float32'), (512, 512, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 512, 2, 2, 'float32'), (512, 512, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 512, 4, 4, 'float32'), (512, 512, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 256, 4, 4, 'float32'), (512, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 256, 8, 8, 'float32'), (256, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 128, 8, 8, 'float32'), (256, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 128, 16, 16, 'float32'), (128, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 64, 16, 16, 'float32'), (128, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 64, 32, 32, 'float32'), (64, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n",
+      "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 3, 32, 32, 'float32'), (64, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_name = 'conv2d_1_input'\n",
+    "input_size = 6000\n",
+    "shape_dict = {input_name: x_test[:input_size].shape}\n",
+    "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
+    "target = 'cuda -libs=cudnn,cublas'\n",
+    "with relay.build_config(opt_level=3):\n",
+    "    graph, lib, params = relay.build(mod, target, params=params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "ctx = tvm.gpu()\n",
+    "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n",
+    "# create module\n",
+    "module = graph_runtime.create(graph, lib, ctx)\n",
+    "# set input and parameters\n",
+    "module.set_input(\"conv2d_1_input\", x_test[:input_size])\n",
+    "module.set_input(**params)\n",
+    "# run\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Time: 0.2862420082092285\n"
+     ]
+    }
+   ],
+   "source": [
+    "start_time = time.time()\n",
+    "module.run()\n",
+    "out_shape = (input_size, 10)\n",
+    "# get output\n",
+    "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n",
+    "print(\"Time:\", time.time() - start_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "with open ('dumps/tvm_dump', 'rb') as fp:\n",
+    "    tvm_res = pickle.load(fp)\n",
+    "with open ('dumps/keras_dump', 'rb') as fp:\n",
+    "    keras_res = pickle.load(fp)\n",
+    "\n",
+    "for ref_o, o in zip(tvm_res, keras_res):\n",
+    "    np.testing.assert_almost_equal(ref_o, o, 3)  #using decimal of 3 would pass test\n",
+    "print(\"Accuracy matched!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}